diff --git a/app/app.go b/app/app.go index f9ea53c719..a85334ab45 100644 --- a/app/app.go +++ b/app/app.go @@ -323,8 +323,8 @@ func Run(ctx context.Context, conf Config) (err error) { consensusDebugger := consensus.NewDebugger() - wireMonitoringAPI(ctx, life, conf.MonitoringAddr, conf.DebugAddr, p2pNode, eth2Cl, conf.BeaconNodeAddrs, peerIDs, - promRegistry, consensusDebugger, pubkeys, vapiCalls, len(lock.Validators)) + wireMonitoringAPI(ctx, life, conf.MonitoringAddr, conf.DebugAddr, p2pNode, eth2Cl, conf.BeaconNodeAddrs, eth1Cl, + peerIDs, promRegistry, consensusDebugger, pubkeys, vapiCalls, len(lock.Validators)) err = wireCoreWorkflow(ctx, life, conf, lock, nodeIdx, p2pNode, p2pKey, eth2Cl, subEth2Cl, peerIDs, sender, consensusDebugger, pubkeys, sseListener, vapiCallsFunc) diff --git a/app/eth1wrap/interface.go b/app/eth1wrap/interface.go index 6add288ed0..9e1b076acf 100644 --- a/app/eth1wrap/interface.go +++ b/app/eth1wrap/interface.go @@ -72,6 +72,7 @@ type EthClientFactoryFn func(ctx context.Context, rawurl string) (EthClient, err type EthClientRunner interface { Run(ctx context.Context) VerifySmartContractBasedSignature(contractAddress string, hash [32]byte, sig []byte) (bool, error) + ClientVersion(ctx context.Context) (string, error) } type Erc1271FactoryFn func(contractAddress string, client EthClient) (Erc1271, error) diff --git a/app/eth1wrap/mocks/eth_client_runner.go b/app/eth1wrap/mocks/eth_client_runner.go index b6ff99cab5..a7053f41d9 100644 --- a/app/eth1wrap/mocks/eth_client_runner.go +++ b/app/eth1wrap/mocks/eth_client_runner.go @@ -48,6 +48,34 @@ func (_m *EthClientRunner) VerifySmartContractBasedSignature(contractAddress str return r0, r1 } +// ClientVersion provides a mock function with given fields: ctx +func (_m *EthClientRunner) ClientVersion(ctx context.Context) (string, error) { + ret := _m.Called(ctx) + + if len(ret) == 0 { + panic("no return value specified for ClientVersion") + } + + var r0 string + var r1 error + if rf, ok := ret.Get(0).(func(context.Context) (string, error)); ok { + return rf(ctx) + } + if rf, ok := ret.Get(0).(func(context.Context) string); ok { + r0 = rf(ctx) + } else { + r0 = ret.Get(0).(string) + } + + if rf, ok := ret.Get(1).(func(context.Context) error); ok { + r1 = rf(ctx) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + // NewEthClientRunner creates a new instance of EthClientRunner. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. // The first argument is typically a *testing.T value. func NewEthClientRunner(t interface { diff --git a/app/eth1wrap/runner.go b/app/eth1wrap/runner.go index 7d179567fd..7cee8a0ec2 100644 --- a/app/eth1wrap/runner.go +++ b/app/eth1wrap/runner.go @@ -143,6 +143,28 @@ func (noopClient) VerifySmartContractBasedSignature(_ string, _ [32]byte, _ []by return false, ErrNoExecutionEngineAddr } +func (noopClient) ClientVersion(_ context.Context) (string, error) { + return "", ErrNoExecutionEngineAddr +} + +// ClientVersion returns the execution engine client version string via web3_clientVersion RPC. +func (cl *client) ClientVersion(ctx context.Context) (string, error) { + cl.Lock() + defer cl.Unlock() + + if cl.eth1client == nil { + return "", ErrEthClientNotConnected + } + + var ver string + if err := cl.eth1client.Client().CallContext(ctx, &ver, "web3_clientVersion"); err != nil { + cl.maybeReconnect() + return "", errors.Wrap(err, "get execution layer client version") + } + + return ver, nil +} + func (cl *client) maybeReconnect() { cl.reconnectCh <- struct{}{} } diff --git a/app/eth1wrap/version.go b/app/eth1wrap/version.go new file mode 100644 index 0000000000..4525936e85 --- /dev/null +++ b/app/eth1wrap/version.go @@ -0,0 +1,107 @@ +// Copyright © 2022-2026 Obol Labs Inc. Licensed under the terms of a Business Source License 1.1 + +package eth1wrap + +import ( + "context" + "regexp" + + "github.com/obolnetwork/charon/app/log" + "github.com/obolnetwork/charon/app/version" + "github.com/obolnetwork/charon/app/z" +) + +var ( + minGethVersion = mustParse("v1.16.7") + minNethermindVersion = mustParse("v1.35.0") + minBesuVersion = mustParse("v25.11.0") + minErigonVersion = mustParse("v3.2.2") + minRethVersion = mustParse("v1.9.1") + + minimumExecutionEngineVersion = map[string]version.SemVer{ + "Geth": minGethVersion, + "Nethermind": minNethermindVersion, + "besu": minBesuVersion, + "erigon": minErigonVersion, + "reth": minRethVersion, + } + + incompatibleExecutionEngineVersion = map[string][]version.SemVer{} +) + +func mustParse(v string) version.SemVer { + sv, err := version.Parse(v) + if err != nil { + panic(err) + } + + return sv +} + +type ExecutionEngineVersionStatus int + +const ( + ELVersionOK ExecutionEngineVersionStatus = iota + ELVersionFormatError + ELVersionUnknownClient + ELVersionTooOld + ELVersionIncompatible +) + +var elVersionExtractRegex = regexp.MustCompile(`^([^/]+)/v?([0-9]+\.[0-9]+\.[0-9]+)`) + +// checkExecutionEngineVersionStatus checks the version of the execution engine client against the minimum required version. +func checkExecutionEngineVersionStatus(elVersion string) (status ExecutionEngineVersionStatus, clVer string, minVer string) { + matches := elVersionExtractRegex.FindStringSubmatch(elVersion) + if len(matches) != 3 { + return ELVersionFormatError, "", "" + } + + client := matches[1] + + clientVersion, err := version.Parse("v" + matches[2]) + if err != nil { + return ELVersionFormatError, "", "" + } + + minVersion, ok := minimumExecutionEngineVersion[client] + if !ok { + return ELVersionUnknownClient, "", "" + } + + if version.Compare(clientVersion, minVersion) == -1 { + return ELVersionTooOld, clientVersion.String(), minVersion.String() + } + + for _, badVer := range incompatibleExecutionEngineVersion[client] { + if version.Compare(clientVersion, badVer) == 0 { + return ELVersionIncompatible, clientVersion.String(), "" + } + } + + return ELVersionOK, clientVersion.String(), minVersion.String() +} + +// CheckExecutionEngineVersion checks the version of the execution engine client and logs a warning +// if the version is below the minimum, incompatible, or the client is not recognized. +func CheckExecutionEngineVersion(ctx context.Context, elVersion string) { + status, currentVersion, minVersion := checkExecutionEngineVersionStatus(elVersion) + + //nolint:revive // enforce-switch-style: the list is exhaustive and there is no need for default + switch status { + case ELVersionFormatError: + log.Warn(ctx, "Failed to parse execution engine version string due to unexpected format. This may indicate an unsupported or custom execution engine build", + nil, z.Str("input", elVersion)) + case ELVersionUnknownClient: + log.Warn(ctx, "Unknown execution engine client detected. The client is not in the supported client list and may cause compatibility issues", + nil, z.Str("client", elVersion)) + case ELVersionTooOld: + log.Warn(ctx, "Execution engine client version is below the minimum supported version. Please upgrade your execution engine to ensure compatibility and security", + nil, z.Str("client_version", currentVersion), z.Str("minimum_required", minVersion)) + case ELVersionIncompatible: + log.Warn(ctx, "Execution engine client version is known to be incompatible with Charon. Please upgrade or downgrade your execution engine to a compatible version", + nil, z.Str("client_version", currentVersion)) + case ELVersionOK: + // Do nothing + } +} diff --git a/app/metrics.go b/app/metrics.go index f0efbe4541..0c1e691e2c 100644 --- a/app/metrics.go +++ b/app/metrics.go @@ -90,6 +90,13 @@ var ( Help: "Constant gauge with labels set to the version and beacon_id of the upstream beacon node", }, []string{"version", "beacon_id"}) + executionEngineVersionGauge = promauto.NewResetGaugeVec(prometheus.GaugeOpts{ + Namespace: "app", + Subsystem: "execution_layer", + Name: "version", + Help: "Constant gauge with labels set to the version of the upstream execution layer", + }, []string{"version"}) + thresholdGauge = promauto.NewGauge(prometheus.GaugeOpts{ Namespace: "cluster", Name: "threshold", diff --git a/app/monitoringapi.go b/app/monitoringapi.go index e1ba6b3c11..7b62961c0e 100644 --- a/app/monitoringapi.go +++ b/app/monitoringapi.go @@ -19,6 +19,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/obolnetwork/charon/app/errors" + "github.com/obolnetwork/charon/app/eth1wrap" "github.com/obolnetwork/charon/app/eth2wrap" "github.com/obolnetwork/charon/app/health" "github.com/obolnetwork/charon/app/lifecycle" @@ -45,12 +46,12 @@ var ( // wireMonitoringAPI constructs the monitoring API and registers it with the life cycle manager. // It serves prometheus metrics, pprof profiling and the runtime enr. func wireMonitoringAPI(ctx context.Context, life *lifecycle.Manager, promAddr, debugAddr string, - p2pNode host.Host, eth2Cl eth2wrap.Client, beaconNodeAddrs []string, + p2pNode host.Host, eth2Cl eth2wrap.Client, beaconNodeAddrs []string, eth1Cl eth1wrap.EthClientRunner, peerIDs []peer.ID, registry *prometheus.Registry, consensusDebugger http.Handler, pubkeys []core.PubKey, vapiCalls <-chan struct{}, numValidators int, ) { - beaconNodeVersionMetric(ctx, eth2Cl, beaconNodeAddrs, clockwork.NewRealClock()) + consensusAndExecutionVersionMetric(ctx, eth2Cl, beaconNodeAddrs, eth1Cl, clockwork.NewRealClock()) mux := http.NewServeMux() @@ -253,12 +254,13 @@ func beaconNodeSyncing(ctx context.Context, eth2Cl eth2client.NodeSyncingProvide return eth2Resp.Data.IsSyncing, eth2Resp.Data.SyncDistance, nil } -// beaconNodeVersionMetric sets the beacon node version gauge. -func beaconNodeVersionMetric(ctx context.Context, eth2Cl eth2wrap.Client, beaconNodeAddrs []string, clk clockwork.Clock) { +// consensusAndExecutionVersionMetric sets the beacon node and execution engine version gauges. +func consensusAndExecutionVersionMetric(ctx context.Context, eth2Cl eth2wrap.Client, beaconNodeAddrs []string, eth1Cl eth1wrap.EthClientRunner, clk clockwork.Clock) { nodeVersionTicker := clk.NewTicker(10 * time.Minute) setNodeVersionAndID := func() { beaconNodeVersionGauge.Reset() + executionEngineVersionGauge.Reset() // Query each beacon node individually for _, addr := range beaconNodeAddrs { @@ -287,6 +289,17 @@ func beaconNodeVersionMetric(ctx context.Context, eth2Cl eth2wrap.Client, beacon eth2wrap.CheckBeaconNodeVersion(ctx, version) } + + // Query the execution engine version + elVersion, err := eth1Cl.ClientVersion(ctx) + if errors.Is(err, eth1wrap.ErrNoExecutionEngineAddr) { //nolint:revive + // No execution engine configured, skip. + } else if err != nil { + log.Warn(ctx, "Failed to fetch execution engine version", err) + } else { + executionEngineVersionGauge.WithLabelValues(elVersion).Set(1) + eth1wrap.CheckExecutionEngineVersion(ctx, elVersion) + } } go func() { diff --git a/app/monitoringapi_internal_test.go b/app/monitoringapi_internal_test.go index f12165f8a0..5297a85ede 100644 --- a/app/monitoringapi_internal_test.go +++ b/app/monitoringapi_internal_test.go @@ -12,9 +12,12 @@ import ( "github.com/jonboulle/clockwork" "github.com/libp2p/go-libp2p/core/host" "github.com/libp2p/go-libp2p/core/peer" + "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" "github.com/obolnetwork/charon/app/errors" + "github.com/obolnetwork/charon/app/eth1wrap" + eth1wrapmocks "github.com/obolnetwork/charon/app/eth1wrap/mocks" "github.com/obolnetwork/charon/testutil" "github.com/obolnetwork/charon/testutil/beaconmock" ) @@ -167,6 +170,96 @@ func TestStartChecker(t *testing.T) { } } +func TestConsensusAndExecutionVersionMetric(t *testing.T) { + tests := []struct { + name string + beaconAddrs []string + nodeVersionErr error + elVersion string + elErr error + wantNodeVersionCalls int + }{ + { + name: "success single beacon node with el", + beaconAddrs: []string{"http://beacon1:5052"}, + elVersion: "Geth/v1.16.7-stable/linux-amd64/go1.22.0", + wantNodeVersionCalls: 1, + }, + { + name: "success multiple beacon nodes with el", + beaconAddrs: []string{"http://beacon1:5052", "http://beacon2:5052"}, + elVersion: "Geth/v1.16.7-stable/linux-amd64/go1.22.0", + wantNodeVersionCalls: 2, + }, + { + name: "beacon node version error skips that node", + beaconAddrs: []string{"http://beacon1:5052"}, + nodeVersionErr: errors.New("connection refused"), + wantNodeVersionCalls: 1, + }, + { + name: "no beacon nodes still queries el", + beaconAddrs: []string{}, + elVersion: "Geth/v1.16.7-stable/linux-amd64/go1.22.0", + }, + { + name: "el error no addr silently skipped", + beaconAddrs: []string{"http://beacon1:5052"}, + elErr: eth1wrap.ErrNoExecutionEngineAddr, + wantNodeVersionCalls: 1, + }, + { + name: "el generic error does not panic", + beaconAddrs: []string{"http://beacon1:5052"}, + elErr: errors.New("rpc connection error"), + wantNodeVersionCalls: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx, cancel := context.WithCancel(t.Context()) + defer cancel() + + // done is closed by ClientVersion (always the last call in setNodeVersionAndID), + // providing a happens-before guarantee that nodeVersionCalls is safe to read after. + done := make(chan struct{}) + + var nodeVersionCalls int + + bmock, err := beaconmock.New(t.Context()) + require.NoError(t, err) + + bmock.NodeVersionFunc = func(_ context.Context, _ *eth2api.NodeVersionOpts) (*eth2api.Response[string], error) { + nodeVersionCalls++ + + if tt.nodeVersionErr != nil { + return nil, tt.nodeVersionErr + } + + return ð2api.Response[string]{Data: "Lighthouse/v5.3.0-aa022f4/x86_64-linux"}, nil + } + + eth1Cl := eth1wrapmocks.NewEthClientRunner(t) + eth1Cl.On("ClientVersion", mock.Anything).Run(func(_ mock.Arguments) { + close(done) + }).Return(tt.elVersion, tt.elErr).Once() + + clock := clockwork.NewFakeClock() + + consensusAndExecutionVersionMetric(ctx, bmock, tt.beaconAddrs, eth1Cl, clock) + + select { + case <-done: + case <-time.After(time.Second): + t.Fatal("timed out waiting for ClientVersion call") + } + + require.Equal(t, tt.wantNodeVersionCalls, nodeVersionCalls) + }) + } +} + func advanceClock(t *testing.T, ctx context.Context, clock *clockwork.FakeClock, duration time.Duration) { t.Helper() diff --git a/docs/metrics.md b/docs/metrics.md index a2e3589d2b..0b44c1537f 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -28,6 +28,7 @@ when storing metrics from multiple nodes or clusters in one Prometheus instance. | `app_eth2_latency_seconds` | Histogram | Latency in seconds for eth2 beacon node requests | `endpoint` | | `app_eth2_requests_total` | Counter | Total number of requests sent to eth2 beacon node | `endpoint` | | `app_eth2_using_fallback` | Gauge | Indicates if client is using fallback (1) or primary (0) beacon node | | +| `app_execution_layer_version` | Gauge | Constant gauge with labels set to the version of the upstream execution layer | `version` | | `app_feature_flags` | Gauge | Constant gauge with custom enabled feature flags | `feature_flags` | | `app_git_commit` | Gauge | Constant gauge with label set to current git commit hash | `git_hash` | | `app_health_checks` | Gauge | Application health checks by name and severity. Set to 1 for failing, 0 for ok. | `severity, name` |