diff --git a/pkg/client/client.go b/pkg/client/client.go index a45fd75e..8f0979c4 100644 --- a/pkg/client/client.go +++ b/pkg/client/client.go @@ -150,8 +150,17 @@ func New(options *Options) (*Client, error) { secretKey = options.SessionInfo.SecretKey token = options.SessionInfo.Token } else { - // Generate a random ksuid which will be used as server secret. - correlationID = xid.New().String() + // Use a privacy-preserving xid: keep the 4-byte timestamp prefix so the id + // stays k-ordered and parseable, but randomize the trailing 8 bytes + // (machine + pid + counter). Plain xid.New() leaks a stable per-machine + // fingerprint (md5(hostname)[:3]) through every OAST callback, which is + // observable by the target service and by third-party telemetry that + // logs OAST traffic (see issue #1349). + anonID, err := newAnonymousCorrelationID() + if err != nil { + return nil, errkit.Wrap(err, "could not generate correlation id") + } + correlationID = anonID if len(correlationID) > options.CorrelationIdLength { correlationID = correlationID[:options.CorrelationIdLength] } @@ -721,3 +730,18 @@ func (c *Client) SaveSessionTo(filename string) error { } return os.WriteFile(filename, data, os.ModePerm) } + +// newAnonymousCorrelationID returns an xid-formatted correlation id that +// preserves the 4-byte timestamp prefix (for k-ordering and the server-side +// xidAlphabet validator) and replaces the remaining 8 bytes (machine, pid, +// counter) with crypto/rand. The result is still a valid xid string, still +// 20 chars from the [0-9a-v] alphabet, and still parseable with xid.FromString. +func newAnonymousCorrelationID() (string, error) { + id := xid.NewWithTime(time.Now()) + // Overwrite machine (4..6), pid (7..8), counter (9..11) with random bytes. + // id is xid.ID ([12]byte) and id[4:] aliases the same backing array. + if _, err := rand.Read(id[4:]); err != nil { + return "", err + } + return id.String(), nil +} diff --git a/pkg/client/correlation_id_test.go b/pkg/client/correlation_id_test.go new file mode 100644 index 00000000..c09a448c --- /dev/null +++ b/pkg/client/correlation_id_test.go @@ -0,0 +1,75 @@ +package client + +import ( + "regexp" + "testing" + "time" + + "github.com/rs/xid" + "github.com/stretchr/testify/require" +) + +// xidAlphabet matches the server-side validator (see pkg/server/util.go). +var xidAlphabet = regexp.MustCompile(`^[0-9a-v]{20}$`) + +func TestAnonymousCorrelationIDFormat(t *testing.T) { + id, err := newAnonymousCorrelationID() + require.NoError(t, err) + require.Len(t, id, 20) + require.Regexp(t, xidAlphabet, id, "must match server xid alphabet validator") + + parsed, err := xid.FromString(id) + require.NoError(t, err, "must remain parseable as xid") + require.WithinDuration(t, time.Now(), parsed.Time(), 5*time.Second, + "timestamp prefix must encode the current time") +} + +// TestAnonymousCorrelationIDNotFingerprinted asserts that the machine and pid +// bytes are randomised: xid.New() returns the same machine+pid for every call +// in the same process; newAnonymousCorrelationID() must not. +func TestAnonymousCorrelationIDNotFingerprinted(t *testing.T) { + const samples = 64 + + machineSet := make(map[string]struct{}, samples) + pidSet := make(map[uint16]struct{}, samples) + + for range samples { + raw, err := newAnonymousCorrelationID() + require.NoError(t, err) + id, err := xid.FromString(raw) + require.NoError(t, err) + machineSet[string(id.Machine())] = struct{}{} + pidSet[id.Pid()] = struct{}{} + } + + // With 64 samples and full randomness, the probability of getting a + // single repeated 3-byte machine value is ~64^2 / 2^25 ≈ 1.2e-4. A + // fingerprinted id would collapse to a single value, so even a weak + // lower bound here catches the regression. + require.Greater(t, len(machineSet), samples/2, + "machine bytes must be randomised across calls") + require.Greater(t, len(pidSet), samples/2, + "pid bytes must be randomised across calls") +} + +func TestAnonymousCorrelationIDDistinctFromXidNew(t *testing.T) { + // xid.New() inherits a stable machine fingerprint per process; the + // anonymous helper must not. + stable := xid.New().Machine() + collisions := 0 + const samples = 32 + for range samples { + raw, err := newAnonymousCorrelationID() + require.NoError(t, err) + id, err := xid.FromString(raw) + require.NoError(t, err) + if string(id.Machine()) == string(stable) { + collisions++ + } + } + // A truly fingerprinted helper would collide on every sample. Allow a + // generous threshold so the test stays stable against random chance + // (expected collisions ≈ samples / 2^24, i.e. effectively zero). + require.Less(t, collisions, samples/4, + "anonymous helper must not reuse xid.New()'s machine fingerprint") +}