Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/cmd/openshift-tests/run-upgrade/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ func (o *RunUpgradeSuiteOptions) Run(ctx context.Context) error {
UpgradeTargetPayloadImagePullSpec: last,
ExactMonitorTests: o.GinkgoRunSuiteOptions.ExactMonitorTests,
DisableMonitorTests: o.GinkgoRunSuiteOptions.DisableMonitorTests,
SuiteName: o.Suite.Name,
}

o.GinkgoRunSuiteOptions.CommandEnv = o.TestCommandEnvironment()
Expand Down
1 change: 1 addition & 0 deletions pkg/cmd/openshift-tests/run/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ func (o *RunSuiteOptions) Run(ctx context.Context) error {
ClusterStabilityDuringTest: monitortestframework.ClusterStabilityDuringTest(stabilitySetting),
ExactMonitorTests: o.GinkgoRunSuiteOptions.ExactMonitorTests,
DisableMonitorTests: o.GinkgoRunSuiteOptions.DisableMonitorTests,
SuiteName: o.Suite.Name,
}

o.GinkgoRunSuiteOptions.CommandEnv = o.TestCommandEnvironment()
Expand Down
2 changes: 2 additions & 0 deletions pkg/defaultmonitortests/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ import (
"github.com/openshift/origin/pkg/monitortests/testframework/additionaleventscollector"
"github.com/openshift/origin/pkg/monitortests/testframework/alertanalyzer"
"github.com/openshift/origin/pkg/monitortests/testframework/clusterinfoserializer"
"github.com/openshift/origin/pkg/monitortests/testframework/clusterinstancetypes"
"github.com/openshift/origin/pkg/monitortests/testframework/cpumetriccollector"
"github.com/openshift/origin/pkg/monitortests/testframework/disruptionexternalawscloudservicemonitoring"
"github.com/openshift/origin/pkg/monitortests/testframework/disruptionexternalazurecloudservicemonitoring"
Expand Down Expand Up @@ -208,6 +209,7 @@ func newUniversalMonitorTests(info monitortestframework.MonitorTestInitializatio
monitorTestRegistry.AddMonitorTestOrDie("interval-serializer", "Test Framework", intervalserializer.NewIntervalSerializer())
monitorTestRegistry.AddMonitorTestOrDie("tracked-resources-serializer", "Test Framework", trackedresourcesserializer.NewTrackedResourcesSerializer())
monitorTestRegistry.AddMonitorTestOrDie("cluster-info-serializer", "Test Framework", clusterinfoserializer.NewClusterInfoSerializer())
monitorTestRegistry.AddMonitorTestOrDie("cluster-instance-types", "Test Framework", clusterinstancetypes.NewClusterInstanceTypes(info))
monitorTestRegistry.AddMonitorTestOrDie("additional-events-collector", "Test Framework", additionaleventscollector.NewIntervalSerializer())
monitorTestRegistry.AddMonitorTestOrDie("known-image-checker", "Test Framework", knownimagechecker.NewEnsureValidImages())
monitorTestRegistry.AddMonitorTestOrDie("e2e-test-analyzer", "Test Framework", e2etestanalyzer.NewAnalyzer())
Expand Down
3 changes: 3 additions & 0 deletions pkg/monitortestframework/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ type MonitorTestInitializationInfo struct {

// DisableMonitorTests will remove any monitor tests contained in the provided list
DisableMonitorTests []string

// SuiteName is the name of the test suite being run (e.g. "kubernetes/conformance", "openshift/conformance/parallel").
SuiteName string
}

type OpenshiftTestImageGetterFunc func(ctx context.Context, adminRESTConfig *rest.Config) (imagePullSpec string, notSupportedReason string, err error)
Expand Down
200 changes: 200 additions & 0 deletions pkg/monitortests/testframework/clusterinstancetypes/monitortest.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
package clusterinstancetypes

import (
"context"
"fmt"
"path/filepath"
"sort"
"strings"
"time"

configclient "github.com/openshift/client-go/config/clientset/versioned"
"github.com/openshift/origin/pkg/dataloader"
"github.com/openshift/origin/pkg/monitor/monitorapi"
"github.com/openshift/origin/pkg/monitortestframework"
"github.com/openshift/origin/pkg/test/ginkgo/junitapi"
"github.com/sirupsen/logrus"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
)

type clusterInstanceTypes struct {
adminRESTConfig *rest.Config
suiteName string
data []instanceTypeRow
}

type instanceTypeRow struct {
Platform string `json:"platform"`
Region string `json:"region"`
Zone string `json:"zone"`
Role string `json:"role"`
InstanceType string `json:"instance_type"`

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On AWS, we also support local and wavelength zones via edge compute pool.

Let's also collect zone details (for AWS only?). We can get it by inspecting node label topology.kubernetes.io/zone, for example, in this run:

topology.kubernetes.io/zone: us-west-2-pdx-1a

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this not reflected in the region? Are these zones meaningful consistent names for customers vs those that jump around with different names per account? Will all masters/workers be in the same zone in this case?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this not reflected in the region?

Oh, not at all, for example, region us-west-2 only reflects regular AZs (e.g. us-west-2a, us-west-2b). Local/Wavelength zones are special AZs that require additional day-0 configuration via edge compute pool (see EP); thus, dedicated jobs:

  • periodic-ci-openshift-verification-tests-main-installation-nightly-5.0-aws-ipi-all-localzones-f1
  • periodic-ci-openshift-verification-tests-main-installation-nightly-5.0-aws-ipi-all-wavelength-zones-f1

Are these zones meaningful consistent names for customers vs those that jump around with different names per account?

AFAIK, local and wavelength zones are not subjected to logical zone mappings between accounts. These zones are limited and point to deterministic physical locations (local zones and wavelength zones). Besides, logical mapping is only applicable to older regions according to AWS docs and accounts created before Nov 2025.

Will all masters/workers be in the same zone in this case?

oh, no. Local and Wavelength zones only apply to worker nodes. AFAIK, the 2 jobs above will find all available local/wavelength zones and put 1 worker replica in each. For example, this job defines 20 workers for 20 local zones.

Suite string `json:"suite"`
}

func NewClusterInstanceTypes(info monitortestframework.MonitorTestInitializationInfo) monitortestframework.MonitorTest {
return &clusterInstanceTypes{
suiteName: info.SuiteName,
}
}

func (w *clusterInstanceTypes) PrepareCollection(ctx context.Context, adminRESTConfig *rest.Config, recorder monitorapi.RecorderWriter) error {
return nil
}

func (w *clusterInstanceTypes) StartCollection(ctx context.Context, adminRESTConfig *rest.Config, recorder monitorapi.RecorderWriter) error {
w.adminRESTConfig = adminRESTConfig
return nil
}

func (w *clusterInstanceTypes) CollectData(ctx context.Context, storageDir string, beginning, end time.Time) (monitorapi.Intervals, []*junitapi.JUnitTestCase, error) {
logger := logrus.WithField("MonitorTest", "ClusterInstanceTypes")

data, err := w.collect(ctx)
if err != nil {
logger.WithError(err).Warn("failed to collect instance type data")
return nil, nil, nil
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
w.data = data
return nil, nil, nil
}

func (*clusterInstanceTypes) ConstructComputedIntervals(ctx context.Context, startingIntervals monitorapi.Intervals, recordedResources monitorapi.ResourcesMap, beginning, end time.Time) (monitorapi.Intervals, error) {
return nil, nil
}

func (*clusterInstanceTypes) EvaluateTestsFromConstructedIntervals(ctx context.Context, finalIntervals monitorapi.Intervals) ([]*junitapi.JUnitTestCase, error) {
return nil, nil
}

func (w *clusterInstanceTypes) WriteContentToStorage(ctx context.Context, storageDir, timeSuffix string, finalIntervals monitorapi.Intervals, finalResourceState monitorapi.ResourcesMap) error {
if len(w.data) == 0 {
return nil
}

rows := make([]map[string]string, 0, len(w.data))
for _, r := range w.data {
rows = append(rows, map[string]string{
"Platform": r.Platform,
"Region": r.Region,
"Zone": r.Zone,
"Role": r.Role,
"InstanceType": r.InstanceType,
"Suite": r.Suite,
})
}

dataFile := dataloader.DataFile{
TableName: "cluster_instance_types",
Schema: map[string]dataloader.DataType{
"Platform": dataloader.DataTypeString,
"Region": dataloader.DataTypeString,
"Zone": dataloader.DataTypeString,
"Role": dataloader.DataTypeString,
"InstanceType": dataloader.DataTypeString,
"Suite": dataloader.DataTypeString,
},
Rows: rows,
}

fileName := filepath.Join(storageDir, fmt.Sprintf("cluster-instance-types%s-%s", timeSuffix, dataloader.AutoDataLoaderSuffix))
if err := dataloader.WriteDataFile(fileName, dataFile); err != nil {
return fmt.Errorf("failed to write instance types autodl: %w", err)
}

return nil
}

func (*clusterInstanceTypes) Cleanup(ctx context.Context) error {
return nil
}

func (w *clusterInstanceTypes) collect(ctx context.Context) ([]instanceTypeRow, error) {
configClient, err := configclient.NewForConfig(w.adminRESTConfig)
if err != nil {
return nil, fmt.Errorf("failed to create config client: %w", err)
}

infra, err := configClient.ConfigV1().Infrastructures().Get(ctx, "cluster", metav1.GetOptions{})
if err != nil {
return nil, fmt.Errorf("failed to get infrastructure: %w", err)
}

if infra.Status.PlatformStatus == nil {
logrus.Info("skipping instance type collection: platform status not set")
return nil, nil
}

platform := strings.ToLower(string(infra.Status.PlatformStatus.Type))
if platform != "aws" && platform != "azure" && platform != "gcp" {
logrus.WithField("platform", platform).Info("skipping instance type collection for unsupported platform")
return nil, nil
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

kubeClient, err := kubernetes.NewForConfig(w.adminRESTConfig)
if err != nil {
return nil, fmt.Errorf("failed to create kube client: %w", err)
}

nodes, err := kubeClient.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
if err != nil {
return nil, fmt.Errorf("failed to list nodes: %w", err)
}

return buildRows(platform, w.suiteName, nodes.Items), nil
}

func buildRows(platform, suite string, nodes []corev1.Node) []instanceTypeRow {
seen := map[string]bool{}
var result []instanceTypeRow

for i := range nodes {
node := &nodes[i]
labels := node.Labels

instanceType := labels["node.kubernetes.io/instance-type"]
if instanceType == "" {
continue
}

region := labels["topology.kubernetes.io/region"]
zone := labels["topology.kubernetes.io/zone"]
role := nodeRole(labels)

key := role + "/" + instanceType + "/" + zone
if seen[key] {
continue
}
seen[key] = true
result = append(result, instanceTypeRow{
Platform: platform,
Region: region,
Zone: zone,
Role: role,
InstanceType: instanceType,
Suite: suite,
})
}

sort.Slice(result, func(i, j int) bool {
if result[i].Role != result[j].Role {
return result[i].Role < result[j].Role
}
return result[i].InstanceType < result[j].InstanceType
})

return result
}

func nodeRole(labels map[string]string) string {
if _, ok := labels["node-role.kubernetes.io/master"]; ok {
return "control-plane"
}
if _, ok := labels["node-role.kubernetes.io/control-plane"]; ok {
return "control-plane"
}
return "worker"
}
Loading