Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 45 additions & 1 deletion services/api/platform_auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ const (
apiLoginLockoutBase = 15 * time.Second
apiLoginLockoutMax = 5 * time.Minute
)
const (
apiLoginAttemptIdleTTL = 30 * time.Minute
apiLoginAttemptMaxEntries = 4096
)
const (
platformSignupRequestMaxBytes = 4 * 1024
platformPasswordLoginRequestMaxBytes = 4 * 1024
Expand All @@ -43,6 +47,7 @@ type passwordUserEnsurer interface {
type apiLoginAttempt struct {
failures int
lockedUntil time.Time
lastSeen time.Time
}

type apiLoginAttemptTracker struct {
Expand All @@ -61,8 +66,14 @@ func newAPILoginAttemptTracker(nowFn func() time.Time) *apiLoginAttemptTracker {
func (t *apiLoginAttemptTracker) allow(key string) bool {
t.mu.Lock()
defer t.mu.Unlock()
state := t.entries[key]
now := t.nowFunc()
t.pruneLocked(now)
state, ok := t.entries[key]
if !ok {
return true
}
state.lastSeen = now
t.entries[key] = state
if state.lockedUntil.IsZero() || !state.lockedUntil.After(now) {
return true
}
Expand All @@ -73,22 +84,55 @@ func (t *apiLoginAttemptTracker) recordFailure(key string) int {
t.mu.Lock()
defer t.mu.Unlock()
now := t.nowFunc()
t.pruneLocked(now)
state := t.entries[key]
state.failures++
state.lockedUntil = now.Add(lockoutDurationForFailures(state.failures))
state.lastSeen = now
t.entries[key] = state
t.enforceMaxLocked()
return state.failures
}

func (t *apiLoginAttemptTracker) recordSuccess(key string) int {
t.mu.Lock()
defer t.mu.Unlock()
now := t.nowFunc()
t.pruneLocked(now)
state := t.entries[key]
failures := state.failures
delete(t.entries, key)
return failures
}

func (t *apiLoginAttemptTracker) pruneLocked(now time.Time) {
if apiLoginAttemptIdleTTL <= 0 {
return
}
for key, state := range t.entries {
if state.lastSeen.IsZero() || (now.Sub(state.lastSeen) > apiLoginAttemptIdleTTL && !state.lockedUntil.After(now)) {
delete(t.entries, key)
}
}
}
Comment on lines +108 to +117

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The pruneLocked method iterates over the entire entries map on every call to allow, recordFailure, and recordSuccess. With a default limit of 4096 entries, this O(N) operation per request introduces significant CPU overhead, especially under high load or during a brute-force attack. This could be exploited as a CPU exhaustion vector.

Consider performing pruning periodically (e.g., using a background goroutine) or only when the map size exceeds a certain threshold to decouple this maintenance from the request path.


func (t *apiLoginAttemptTracker) enforceMaxLocked() {
for len(t.entries) > apiLoginAttemptMaxEntries {
var oldestKey string
var oldestSeen time.Time
for key, state := range t.entries {
if oldestKey == "" || state.lastSeen.Before(oldestSeen) {
oldestKey = key
oldestSeen = state.lastSeen
}
Comment on lines +123 to +127

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Avoid evicting active API login lockouts under map pressure

The API attempt cap also evicts solely by oldest lastSeen, regardless of whether an entry is still locked. After the map reaches 4096 keys, additional failed logins on other keys can evict a currently locked ip|email entry, allowing immediate retries for that principal and reducing the effectiveness of password-bruteforce throttling.

Useful? React with 👍 / 👎.

}
if oldestKey == "" {
return
}
delete(t.entries, oldestKey)
}
}
Comment on lines +119 to +134

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The enforceMaxLocked method performs a full scan of the map to find the oldest entry, resulting in O(N) complexity. While it is only executed when the map exceeds its limit, an attacker could intentionally trigger this scan repeatedly by rotating keys. For better scalability and resistance to DoS, consider using a data structure that supports O(1) eviction, such as a doubly linked list combined with the map (LRU cache pattern).


func lockoutDurationForFailures(failures int) time.Duration {
if failures <= 2 {
return 0
Expand Down
38 changes: 38 additions & 0 deletions services/api/platform_auth_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package main
import (
"context"
"errors"
"fmt"
"testing"
"time"
)
Expand Down Expand Up @@ -99,3 +100,40 @@ func TestOpenPlatformStoreWithRetryRetriesUntilSuccess(t *testing.T) {
t.Fatalf("open attempts = %d, want 2", calls)
}
}

func TestAPILoginAttemptTrackerPrunesIdleEntries(t *testing.T) {
now := time.Unix(1_700_000_000, 0)
tracker := newAPILoginAttemptTracker(func() time.Time {
return now
})

tracker.recordFailure("client-old")
now = now.Add(apiLoginAttemptIdleTTL + time.Second)
tracker.recordFailure("client-new")

if _, ok := tracker.entries["client-old"]; ok {
t.Fatal("idle login attempt entry was not pruned")
}
if _, ok := tracker.entries["client-new"]; !ok {
t.Fatal("new login attempt entry missing")
}
}

func TestAPILoginAttemptTrackerCapsRetainedEntries(t *testing.T) {
now := time.Unix(1_700_000_000, 0)
tracker := newAPILoginAttemptTracker(func() time.Time {
return now
})

for i := 0; i < apiLoginAttemptMaxEntries+1; i++ {
tracker.recordFailure(fmt.Sprintf("client-%d", i))
now = now.Add(time.Millisecond)
}

if got := len(tracker.entries); got != apiLoginAttemptMaxEntries {
t.Fatalf("login attempt entries = %d, want %d", got, apiLoginAttemptMaxEntries)
}
if _, ok := tracker.entries["client-0"]; ok {
t.Fatal("oldest login attempt entry was not evicted")
}
}
6 changes: 5 additions & 1 deletion services/mcp-gateway/analytics.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ func (s *gatewayServer) emitIfEnabled(ctx context.Context, event events.Envelope
return
}
s.analyticsMu.Lock()
defer s.analyticsMu.Unlock()
if s.analyticsClosed {
s.analyticsMu.Unlock()
return
}
item := analyticsEvent{
Expand All @@ -82,7 +82,11 @@ func (s *gatewayServer) emitIfEnabled(ctx context.Context, event events.Envelope
}
select {
case queue <- item:
s.analyticsMu.Unlock()
default:
dropped := s.analyticsDropped.Add(1)
s.analyticsMu.Unlock()
log.Printf("gateway analytics queue full; dropped event total=%d source=%q event_type=%q", dropped, event.Source, event.EventType)
}
}

Expand Down
3 changes: 3 additions & 0 deletions services/mcp-gateway/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,9 @@ func TestEmitIfEnabledDropsWhenQueueIsFull(t *testing.T) {
default:
t.Fatal("analytics queue unexpectedly drained")
}
if got := proxy.analyticsDropped.Load(); got != 1 {
t.Fatalf("analytics dropped count = %d, want 1", got)
}
}

func TestStopAnalyticsDispatcherDrainsQueue(t *testing.T) {
Expand Down
1 change: 1 addition & 0 deletions services/mcp-gateway/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ type gatewayServer struct {
analyticsOnce sync.Once
analyticsWG sync.WaitGroup
analyticsClosed bool
analyticsDropped atomic.Uint64
oauthMu sync.Mutex
oauthProviders map[string]*oauthProvider
policyState atomic.Value
Expand Down
54 changes: 48 additions & 6 deletions services/ui/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ const (
defaultLoginFailureWindow = 15 * time.Minute
defaultLoginFailureThreshold = 5
defaultLoginLockoutDuration = 5 * time.Minute
loginAttemptIdleTTL = 30 * time.Minute
loginAttemptMaxClients = 4096
loginFailureLogEvery = 3
loginRequestMaxBytes = 8 * 1024
)
Expand Down Expand Up @@ -84,6 +86,7 @@ type loginAttemptTracker struct {
type loginClientState struct {
tokens int
lastRefill time.Time
lastSeen time.Time
failures int
failuresExpire time.Time
lockedUntil time.Time
Expand Down Expand Up @@ -721,8 +724,10 @@ func (t *loginAttemptTracker) allow(clientID string) bool {
t.mu.Lock()
defer t.mu.Unlock()

state := t.stateForLocked(clientID)
now := t.now()
t.pruneLocked(now)
state := t.stateForLocked(clientID, now)
state.lastSeen = now
refillLoginTokens(state, now)
if now.Before(state.lockedUntil) {
return false
Expand All @@ -731,15 +736,18 @@ func (t *loginAttemptTracker) allow(clientID string) bool {
return false
}
state.tokens--
t.enforceMaxLocked()
return true
}

func (t *loginAttemptTracker) recordFailure(clientID string) int {
t.mu.Lock()
defer t.mu.Unlock()

state := t.stateForLocked(clientID)
now := t.now()
t.pruneLocked(now)
state := t.stateForLocked(clientID, now)
state.lastSeen = now
if now.After(state.failuresExpire) {
state.failures = 0
}
Expand All @@ -748,31 +756,65 @@ func (t *loginAttemptTracker) recordFailure(clientID string) int {
if state.failures >= loginFailureThreshold {
state.lockedUntil = now.Add(loginLockoutDuration)
}
t.enforceMaxLocked()
return state.failures
}

func (t *loginAttemptTracker) recordSuccess(clientID string) int {
t.mu.Lock()
defer t.mu.Unlock()

state := t.stateForLocked(clientID)
now := t.now()
t.pruneLocked(now)
state := t.clients[clientID]
if state == nil {
return 0
}
state.lastSeen = now
prior := state.failures
state.failures = 0
state.failuresExpire = time.Time{}
state.lockedUntil = time.Time{}
return prior
}

func (t *loginAttemptTracker) stateForLocked(clientID string) *loginClientState {
func (t *loginAttemptTracker) stateForLocked(clientID string, now time.Time) *loginClientState {
state := t.clients[clientID]
if state == nil {
now := t.now()
state = &loginClientState{tokens: loginRateLimitCapacity, lastRefill: now}
state = &loginClientState{tokens: loginRateLimitCapacity, lastRefill: now, lastSeen: now}
t.clients[clientID] = state
}
return state
}

func (t *loginAttemptTracker) pruneLocked(now time.Time) {
if loginAttemptIdleTTL <= 0 {
return
}
for clientID, state := range t.clients {
if state.lastSeen.IsZero() || (now.Sub(state.lastSeen) > loginAttemptIdleTTL && !now.Before(state.lockedUntil)) {
delete(t.clients, clientID)
}
}
}
Comment on lines +790 to +799

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Similar to the API service, pruneLocked performs an O(N) iteration over the clients map on every request to the login flow. This is inefficient and presents a performance bottleneck. Pruning should ideally be decoupled from the request path or performed less frequently (e.g., probabilistically or via a background task).


func (t *loginAttemptTracker) enforceMaxLocked() {
for len(t.clients) > loginAttemptMaxClients {
var oldestClientID string
var oldestSeen time.Time
for clientID, state := range t.clients {
if oldestClientID == "" || state.lastSeen.Before(oldestSeen) {
oldestClientID = clientID
oldestSeen = state.lastSeen
}
Comment on lines +805 to +809

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Preserve locked clients when enforcing UI tracker cap

The eviction policy removes whichever client has the oldest lastSeen without checking whether that client is currently locked out. Because enforceMaxLocked() is called from allow() and recordFailure(), once more than 4096 distinct client IDs are seen in a 30-minute window, active lockouts can be evicted and the same attacker can continue login attempts immediately. This weakens the lockout control under high-cardinality or intentionally churned client IDs.

Useful? React with 👍 / 👎.

}
if oldestClientID == "" {
return
}
delete(t.clients, oldestClientID)
}
}

func refillLoginTokens(state *loginClientState, now time.Time) {
if state.lastRefill.IsZero() {
state.lastRefill = now
Expand Down
39 changes: 39 additions & 0 deletions services/ui/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -982,6 +982,45 @@ func TestHandleLoginSuccessResetsFailureCounter(t *testing.T) {
}
}

func TestLoginAttemptTrackerPrunesIdleClients(t *testing.T) {
now := time.Unix(1_700_000_000, 0)
tracker := newLoginAttemptTracker(func() time.Time {
return now
})

tracker.recordFailure("client-old")
now = now.Add(loginAttemptIdleTTL + time.Second)
tracker.recordFailure("client-new")

if _, ok := tracker.clients["client-old"]; ok {
t.Fatal("idle login attempt client was not pruned")
}
if _, ok := tracker.clients["client-new"]; !ok {
t.Fatal("new login attempt client missing")
}
}

func TestLoginAttemptTrackerCapsRetainedClients(t *testing.T) {
now := time.Unix(1_700_000_000, 0)
tracker := newLoginAttemptTracker(func() time.Time {
return now
})

for i := 0; i < loginAttemptMaxClients+1; i++ {
if !tracker.allow(fmt.Sprintf("client-%d", i)) {
t.Fatalf("client-%d should be allowed on first attempt", i)
}
now = now.Add(time.Millisecond)
}

if got := len(tracker.clients); got != loginAttemptMaxClients {
t.Fatalf("login attempt clients = %d, want %d", got, loginAttemptMaxClients)
}
if _, ok := tracker.clients["client-0"]; ok {
t.Fatal("oldest login attempt client was not evicted")
}
}

func useLoginAttemptTrackerForTest(t *testing.T) func() {
t.Helper()
previous := loginAttempts
Expand Down
Loading