From 4ab66bce4191cc0900e2b458be7e7f307d99d6f0 Mon Sep 17 00:00:00 2001 From: Jamo Luhrsen Date: Mon, 16 Mar 2026 20:38:52 -0700 Subject: [PATCH] avoid reporting Progressing during node reboots Only report Progressing when there's an actual network rollout (Generation changed or initial deployment), not during routine node maintenance when pods temporarily become unavailable. Removed blanket MCP progressing check that reported Progressing whenever any pool was updating regardless of whether CNO's configs were already applied. Signed-off-by: Jamo Luhrsen Co-Authred-by: Claude Code --- .../statusmanager/machineconfig_status.go | 18 ------------------ pkg/controller/statusmanager/pod_status.go | 17 ++++++++++++----- 2 files changed, 12 insertions(+), 23 deletions(-) diff --git a/pkg/controller/statusmanager/machineconfig_status.go b/pkg/controller/statusmanager/machineconfig_status.go index adc6e13495..e026d4e759 100644 --- a/pkg/controller/statusmanager/machineconfig_status.go +++ b/pkg/controller/statusmanager/machineconfig_status.go @@ -140,18 +140,11 @@ func (status *StatusManager) SetFromMachineConfigPool(mcPools []mcfgv1.MachineCo // No degraded pools, so clear degraded status status.setNotDegraded(MachineConfig) - // Now check for progressing and process machine configs for role, machineConfigs := range status.renderedMachineConfigs { pools, err := status.findMachineConfigPoolsForLabel(mcPools, map[string]string{names.MachineConfigLabelRoleKey: role}) if err != nil { klog.Errorf("failed to get machine config pools for the role %s: %v", role, err) } - - progressingPool := status.isAnyMachineConfigPoolProgressing(pools) - if progressingPool != "" { - status.setProgressing(MachineConfig, "MachineConfig", fmt.Sprintf("%s machine config pool in progressing state", progressingPool)) - return nil - } for _, pool := range pools { if pool.Spec.Paused { // When a machine config pool is in paused state, then it is expected that mco doesn't process any machine configs for the pool. @@ -250,17 +243,6 @@ func (status *StatusManager) isAnyMachineConfigPoolDegraded(pools []mcfgv1.Machi return degradedPool } -func (status *StatusManager) isAnyMachineConfigPoolProgressing(pools []mcfgv1.MachineConfigPool) string { - var progressingPool string - for _, pool := range pools { - if mcomcfgv1.IsMachineConfigPoolConditionTrue(pool.Status.Conditions, mcfgv1.MachineConfigPoolUpdating) { - progressingPool = pool.Name - break - } - } - return progressingPool -} - func (status *StatusManager) findMachineConfigPoolsForLabel(mcPools []mcfgv1.MachineConfigPool, mcLabel labels.Set) ([]mcfgv1.MachineConfigPool, error) { var mcps []mcfgv1.MachineConfigPool for _, mcPool := range mcPools { diff --git a/pkg/controller/statusmanager/pod_status.go b/pkg/controller/statusmanager/pod_status.go index d548cd4ae1..3714602b7d 100644 --- a/pkg/controller/statusmanager/pod_status.go +++ b/pkg/controller/statusmanager/pod_status.go @@ -96,7 +96,8 @@ func (status *StatusManager) SetFromPods() { } else if ds.Status.UpdatedNumberScheduled < ds.Status.DesiredNumberScheduled { progressing = append(progressing, fmt.Sprintf("DaemonSet %q update is rolling out (%d out of %d updated)", dsName.String(), ds.Status.UpdatedNumberScheduled, ds.Status.DesiredNumberScheduled)) dsProgressing = true - } else if ds.Status.NumberUnavailable > 0 { + } else if ds.Status.NumberUnavailable > 0 && (ds.Status.DesiredNumberScheduled == 0 || ds.Generation > ds.Status.ObservedGeneration) { + // Report Progressing only during initial deployment (DesiredNumberScheduled not set) or active rollout (spec changed) progressing = append(progressing, fmt.Sprintf("DaemonSet %q is not available (awaiting %d nodes)", dsName.String(), ds.Status.NumberUnavailable)) dsProgressing = true // Check for any pods in CrashLoopBackOff state and mark the operator as degraded if so. @@ -153,8 +154,11 @@ func (status *StatusManager) SetFromPods() { progressing = append(progressing, fmt.Sprintf("StatefulSet %q update is rolling out (%d out of %d updated)", ssName.String(), ss.Status.UpdatedReplicas, ss.Status.Replicas)) ssProgressing = true } else if ss.Status.ReadyReplicas > 0 && ss.Status.ReadyReplicas < ss.Status.Replicas { - progressing = append(progressing, fmt.Sprintf("StatefulSet %q is not available (awaiting %d nodes)", ssName.String(), (ss.Status.Replicas-ss.Status.ReadyReplicas))) - ssProgressing = true + if ss.Generation == 0 || ss.Status.ObservedGeneration < ss.Generation { + // Report Progressing during initial deployment or active rollout (spec changed) + progressing = append(progressing, fmt.Sprintf("StatefulSet %q is not available (awaiting %d nodes)", ssName.String(), (ss.Status.Replicas-ss.Status.ReadyReplicas))) + ssProgressing = true + } // Check for any pods in CrashLoopBackOff state and mark the operator as degraded if so. if !isNonCritical(ss) { hung = append(hung, status.CheckCrashLoopBackOffPods(ssName, ss.Spec.Selector.MatchLabels, "StatefulSet")...) @@ -208,8 +212,11 @@ func (status *StatusManager) SetFromPods() { progressing = append(progressing, fmt.Sprintf("Deployment %q update is rolling out (%d out of %d updated)", depName.String(), dep.Status.UpdatedReplicas, dep.Status.Replicas)) depProgressing = true } else if dep.Status.UnavailableReplicas > 0 { - progressing = append(progressing, fmt.Sprintf("Deployment %q is not available (awaiting %d nodes)", depName.String(), dep.Status.UnavailableReplicas)) - depProgressing = true + if dep.Generation == 0 || dep.Status.ObservedGeneration < dep.Generation { + // Report Progressing during initial deployment or active rollout (spec changed) + progressing = append(progressing, fmt.Sprintf("Deployment %q is not available (awaiting %d nodes)", depName.String(), dep.Status.UnavailableReplicas)) + depProgressing = true + } // Check for any pods in CrashLoopBackOff state and mark the operator as degraded if so. if !isNonCritical(dep) { hung = append(hung, status.CheckCrashLoopBackOffPods(depName, dep.Spec.Selector.MatchLabels, "Deployment")...)