Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 0 additions & 18 deletions pkg/controller/statusmanager/machineconfig_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,18 +140,11 @@ func (status *StatusManager) SetFromMachineConfigPool(mcPools []mcfgv1.MachineCo
// No degraded pools, so clear degraded status
status.setNotDegraded(MachineConfig)

// Now check for progressing and process machine configs
for role, machineConfigs := range status.renderedMachineConfigs {
pools, err := status.findMachineConfigPoolsForLabel(mcPools, map[string]string{names.MachineConfigLabelRoleKey: role})
if err != nil {
klog.Errorf("failed to get machine config pools for the role %s: %v", role, err)
}

progressingPool := status.isAnyMachineConfigPoolProgressing(pools)
if progressingPool != "" {
status.setProgressing(MachineConfig, "MachineConfig", fmt.Sprintf("%s machine config pool in progressing state", progressingPool))
return nil
}
for _, pool := range pools {
if pool.Spec.Paused {
// When a machine config pool is in paused state, then it is expected that mco doesn't process any machine configs for the pool.
Expand Down Expand Up @@ -250,17 +243,6 @@ func (status *StatusManager) isAnyMachineConfigPoolDegraded(pools []mcfgv1.Machi
return degradedPool
}

func (status *StatusManager) isAnyMachineConfigPoolProgressing(pools []mcfgv1.MachineConfigPool) string {
var progressingPool string
for _, pool := range pools {
if mcomcfgv1.IsMachineConfigPoolConditionTrue(pool.Status.Conditions, mcfgv1.MachineConfigPoolUpdating) {
progressingPool = pool.Name
break
}
}
return progressingPool
}

func (status *StatusManager) findMachineConfigPoolsForLabel(mcPools []mcfgv1.MachineConfigPool, mcLabel labels.Set) ([]mcfgv1.MachineConfigPool, error) {
var mcps []mcfgv1.MachineConfigPool
for _, mcPool := range mcPools {
Expand Down
17 changes: 12 additions & 5 deletions pkg/controller/statusmanager/pod_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ func (status *StatusManager) SetFromPods() {
} else if ds.Status.UpdatedNumberScheduled < ds.Status.DesiredNumberScheduled {
progressing = append(progressing, fmt.Sprintf("DaemonSet %q update is rolling out (%d out of %d updated)", dsName.String(), ds.Status.UpdatedNumberScheduled, ds.Status.DesiredNumberScheduled))
dsProgressing = true
} else if ds.Status.NumberUnavailable > 0 {
} else if ds.Status.NumberUnavailable > 0 && (ds.Status.DesiredNumberScheduled == 0 || ds.Generation > ds.Status.ObservedGeneration) {
// Report Progressing only during initial deployment (DesiredNumberScheduled not set) or active rollout (spec changed)
progressing = append(progressing, fmt.Sprintf("DaemonSet %q is not available (awaiting %d nodes)", dsName.String(), ds.Status.NumberUnavailable))
dsProgressing = true
// Check for any pods in CrashLoopBackOff state and mark the operator as degraded if so.
Expand Down Expand Up @@ -153,8 +154,11 @@ func (status *StatusManager) SetFromPods() {
progressing = append(progressing, fmt.Sprintf("StatefulSet %q update is rolling out (%d out of %d updated)", ssName.String(), ss.Status.UpdatedReplicas, ss.Status.Replicas))
ssProgressing = true
} else if ss.Status.ReadyReplicas > 0 && ss.Status.ReadyReplicas < ss.Status.Replicas {
progressing = append(progressing, fmt.Sprintf("StatefulSet %q is not available (awaiting %d nodes)", ssName.String(), (ss.Status.Replicas-ss.Status.ReadyReplicas)))
ssProgressing = true
if ss.Generation == 0 || ss.Status.ObservedGeneration < ss.Generation {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ss.Generation == 0 is wrong... I assume this is supposed to be the equivalent of the ds.Status.NumberUnavailable check above

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@danwinship , I think I ended up with this == 0 stuff mostly to make the tests pass. I took a deeper look at this with codex and we came up with this, which should be a little more comprehensive. It's also trying to solve the small window when the controller has observed a new generation but before the rollout is actually fully healthy. That changeset is a little more than this, but hopefully addresses it more properly.

// Report Progressing during initial deployment or active rollout (spec changed)
progressing = append(progressing, fmt.Sprintf("StatefulSet %q is not available (awaiting %d nodes)", ssName.String(), (ss.Status.Replicas-ss.Status.ReadyReplicas)))
ssProgressing = true
}
// Check for any pods in CrashLoopBackOff state and mark the operator as degraded if so.
if !isNonCritical(ss) {
hung = append(hung, status.CheckCrashLoopBackOffPods(ssName, ss.Spec.Selector.MatchLabels, "StatefulSet")...)
Expand Down Expand Up @@ -208,8 +212,11 @@ func (status *StatusManager) SetFromPods() {
progressing = append(progressing, fmt.Sprintf("Deployment %q update is rolling out (%d out of %d updated)", depName.String(), dep.Status.UpdatedReplicas, dep.Status.Replicas))
depProgressing = true
} else if dep.Status.UnavailableReplicas > 0 {
progressing = append(progressing, fmt.Sprintf("Deployment %q is not available (awaiting %d nodes)", depName.String(), dep.Status.UnavailableReplicas))
depProgressing = true
if dep.Generation == 0 || dep.Status.ObservedGeneration < dep.Generation {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

likewise

// Report Progressing during initial deployment or active rollout (spec changed)
progressing = append(progressing, fmt.Sprintf("Deployment %q is not available (awaiting %d nodes)", depName.String(), dep.Status.UnavailableReplicas))
depProgressing = true
}
// Check for any pods in CrashLoopBackOff state and mark the operator as degraded if so.
if !isNonCritical(dep) {
hung = append(hung, status.CheckCrashLoopBackOffPods(depName, dep.Spec.Selector.MatchLabels, "Deployment")...)
Expand Down