Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/clusterstats/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ go_library(
go_test(
name = "clusterstats_test",
srcs = [
"collector_test.go",
"exporter_test.go",
"streamer_test.go",
":mock_client", # keep
Expand Down
44 changes: 40 additions & 4 deletions pkg/cmd/roachtest/clusterstats/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package clusterstats

import (
"context"
"strings"
"time"

"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
Expand Down Expand Up @@ -95,8 +96,8 @@ func (cs *clusterStatCollector) CollectPoint(
if err != nil {
return nil, err
}
if len(warnings) > 0 {
return nil, errors.Newf("found warnings querying prometheus: %s", warnings)
if err := handlePromWarnings(ctx, l, q, warnings); err != nil {
return nil, err
}

fromVec := fromVal.(model.Vector)
Expand Down Expand Up @@ -169,8 +170,8 @@ func (cs *clusterStatCollector) CollectInterval(
if err != nil {
return nil, err
}
if len(warnings) > 0 {
return nil, errors.Newf("found warnings querying prometheus: %s", warnings)
if err := handlePromWarnings(ctx, l, q, warnings); err != nil {
return nil, err
}

fromMatrixTagged := fromVal.(model.Matrix)
Expand Down Expand Up @@ -210,3 +211,38 @@ func (cs *clusterStatCollector) CollectInterval(

return result, nil
}

// handlePromWarnings classifies the annotations returned alongside a Prometheus
// query result. Prometheus annotations come in two flavors, identified by the
// prefix on the wire:
//
// - "PromQL info: ..." annotations are informational (e.g. a counter-named
// metric lint emitted since Prometheus 2.53). The query result is still
// valid; we log these and continue.
// - "PromQL warning: ..." annotations indicate a likely problem with the
// query (e.g. mismatched histogram operations) where Prometheus may have
// dropped result elements. Any other unrecognized annotation (e.g. legacy
// remote-read warnings, which predate the PromQL annotation system) is
// treated the same way to preserve the prior fail-loud behavior.
//
// The returned error, if any, matches the format used historically so existing
// callers and log scrapers see the same string.
func handlePromWarnings(
ctx context.Context, l *logger.Logger, q string, warnings promv1.Warnings,
) error {
if len(warnings) == 0 {
return nil
}
var serious promv1.Warnings
for _, w := range warnings {
if strings.HasPrefix(w, "PromQL info:") {
l.PrintfCtx(ctx, "prometheus info querying %q: %s", q, w)
continue
}
serious = append(serious, w)
}
if len(serious) == 0 {
return nil
}
return errors.Newf("found warnings querying prometheus: %s", serious)
}
68 changes: 68 additions & 0 deletions pkg/cmd/roachtest/clusterstats/collector_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Copyright 2026 The Cockroach Authors.
//
// Use of this software is governed by the CockroachDB Software License
// included in the /LICENSE file.

package clusterstats

import (
"context"
"testing"

"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
promv1 "github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/stretchr/testify/require"
)

func TestHandlePromWarnings(t *testing.T) {
const (
// Sample annotations as emitted by Prometheus 2.53+.
infoNotCounter = `PromQL info: metric might not be a counter, name does not end in _total/_sum/_count/_bucket: "sys_host_disk_write_bytes" (1:6)`
warnGaugeHist = `PromQL warning: rate() applied to gauge histogram has undefined semantics`
legacyRemoteRead = `remote read failed: partial response`
)

tests := []struct {
name string
warnings promv1.Warnings
expectedErr string
}{
{
name: "no warnings",
warnings: nil,
},
{
name: "only info is silenced",
warnings: promv1.Warnings{infoNotCounter},
},
{
name: "warning is still fatal",
warnings: promv1.Warnings{warnGaugeHist},
expectedErr: "found warnings querying prometheus: [" + warnGaugeHist + "]",
},
{
name: "unprefixed annotation is treated as warning",
warnings: promv1.Warnings{legacyRemoteRead},
expectedErr: "found warnings querying prometheus: [" + legacyRemoteRead + "]",
},
{
name: "info filtered out, warning preserved",
warnings: promv1.Warnings{infoNotCounter, warnGaugeHist},
expectedErr: "found warnings querying prometheus: [" + warnGaugeHist + "]",
},
}

l, err := (&logger.Config{}).NewLogger("")
require.NoError(t, err)

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
err := handlePromWarnings(context.Background(), l, "q", tc.warnings)
if tc.expectedErr == "" {
require.NoError(t, err)
} else {
require.EqualError(t, err, tc.expectedErr)
}
})
}
}
Loading