diff --git a/pkg/controller/statusmanager/machineconfig_status.go b/pkg/controller/statusmanager/machineconfig_status.go index adc6e13495..a781b69775 100644 --- a/pkg/controller/statusmanager/machineconfig_status.go +++ b/pkg/controller/statusmanager/machineconfig_status.go @@ -140,55 +140,46 @@ func (status *StatusManager) SetFromMachineConfigPool(mcPools []mcfgv1.MachineCo // No degraded pools, so clear degraded status status.setNotDegraded(MachineConfig) - // Now check for progressing and process machine configs for role, machineConfigs := range status.renderedMachineConfigs { pools, err := status.findMachineConfigPoolsForLabel(mcPools, map[string]string{names.MachineConfigLabelRoleKey: role}) if err != nil { klog.Errorf("failed to get machine config pools for the role %s: %v", role, err) } - - progressingPool := status.isAnyMachineConfigPoolProgressing(pools) - if progressingPool != "" { - status.setProgressing(MachineConfig, "MachineConfig", fmt.Sprintf("%s machine config pool in progressing state", progressingPool)) - return nil - } - for _, pool := range pools { - if pool.Spec.Paused { - // When a machine config pool is in paused state, then it is expected that mco doesn't process any machine configs for the pool. - // so if we report network status as progressing state then it blocks networking upgrade until machine config pool is changed - // into unpaused state. so let's not consider the pool for reporting status. - continue + for _, machineConfig := range machineConfigs.UnsortedList() { + mcSet := sets.New[string](machineConfig) + beingRemoved := false + if mcsBeingRemoved, ok := status.machineConfigsBeingRemoved[role]; ok && mcsBeingRemoved.Has(machineConfig) { + beingRemoved = true } - for _, machineConfig := range machineConfigs.UnsortedList() { - added := true - removed := true - mcSet := sets.Set[string]{} - mcSet.Insert(machineConfig) - if mcsBeingRemoved, ok := status.machineConfigsBeingRemoved[role]; ok && mcsBeingRemoved.Has(machineConfig) { - removed = mcutil.AreMachineConfigsRemovedFromPool(pool.Status, mcSet) - if removed { - status.machineConfigsBeingRemoved[role].Delete(machineConfig) - // Delete map entry from status cache if role doesn't have machine configs. By deleting the entry, - // there won't be any unnecessary processing of pools in the reconcile loop when it's not dealing - // with network operator machine configs anymore. - if status.machineConfigsBeingRemoved[role].Len() == 0 { - delete(status.machineConfigsBeingRemoved, role) - } - status.renderedMachineConfigs[role].Delete(machineConfig) - if status.renderedMachineConfigs[role].Len() == 0 { - delete(status.renderedMachineConfigs, role) - } - if err := status.setLastRenderedMachineConfigState(status.renderedMachineConfigs); err != nil { - return fmt.Errorf("failed to update rendered machine config state: %v", err) - } + + sawNonPausedPool := false + for _, pool := range pools { + if pool.Spec.Paused { + // When a machine config pool is in paused state, then it is expected that mco doesn't process any machine configs for the pool. + // so if we report network status as progressing state then it blocks networking upgrade until machine config pool is changed + // into unpaused state. so let's not consider the pool for reporting status. + continue + } + sawNonPausedPool = true + + if beingRemoved { + if mcutil.AreMachineConfigsRemovedFromPoolSource(pool.Status, mcSet) { + continue } - } else { - added = mcutil.AreMachineConfigsRenderedOnPool(pool.Status, mcSet) + } else if mcutil.AreMachineConfigsRenderedOnPoolSource(pool.Status, mcSet) { + continue } - if !added || !removed { - status.setProgressing(MachineConfig, "MachineConfig", - fmt.Sprintf("%s machine config pool is still processing %s machine config", pool.Name, machineConfig)) - return nil + + // Wait to prune cached removal state until every non-paused pool for + // this role reflects the updated rendered source. + status.setProgressing(MachineConfig, "MachineConfig", + fmt.Sprintf("%s machine config pool is still processing %s machine config", pool.Name, machineConfig)) + return nil + } + + if beingRemoved && sawNonPausedPool { + if err := status.forgetRemovedMachineConfig(role, machineConfig); err != nil { + return err } } } @@ -197,6 +188,24 @@ func (status *StatusManager) SetFromMachineConfigPool(mcPools []mcfgv1.MachineCo return nil } +func (status *StatusManager) forgetRemovedMachineConfig(role, machineConfig string) error { + status.machineConfigsBeingRemoved[role].Delete(machineConfig) + // Delete map entry from status cache if role doesn't have machine configs. By deleting the entry, + // there won't be any unnecessary processing of pools in the reconcile loop when it's not dealing + // with network operator machine configs anymore. + if status.machineConfigsBeingRemoved[role].Len() == 0 { + delete(status.machineConfigsBeingRemoved, role) + } + status.renderedMachineConfigs[role].Delete(machineConfig) + if status.renderedMachineConfigs[role].Len() == 0 { + delete(status.renderedMachineConfigs, role) + } + if err := status.setLastRenderedMachineConfigState(status.renderedMachineConfigs); err != nil { + return fmt.Errorf("failed to update rendered machine config state: %v", err) + } + return nil +} + func (status *StatusManager) getLastRenderedMachineConfigState() (map[string]sets.Set[string], error) { renderedMachineConfigs := map[string]sets.Set[string]{} co := &configv1.ClusterOperator{ObjectMeta: metav1.ObjectMeta{Name: status.name}} @@ -250,17 +259,6 @@ func (status *StatusManager) isAnyMachineConfigPoolDegraded(pools []mcfgv1.Machi return degradedPool } -func (status *StatusManager) isAnyMachineConfigPoolProgressing(pools []mcfgv1.MachineConfigPool) string { - var progressingPool string - for _, pool := range pools { - if mcomcfgv1.IsMachineConfigPoolConditionTrue(pool.Status.Conditions, mcfgv1.MachineConfigPoolUpdating) { - progressingPool = pool.Name - break - } - } - return progressingPool -} - func (status *StatusManager) findMachineConfigPoolsForLabel(mcPools []mcfgv1.MachineConfigPool, mcLabel labels.Set) ([]mcfgv1.MachineConfigPool, error) { var mcps []mcfgv1.MachineConfigPool for _, mcPool := range mcPools { diff --git a/pkg/controller/statusmanager/pod_status.go b/pkg/controller/statusmanager/pod_status.go index d548cd4ae1..f5003206c7 100644 --- a/pkg/controller/statusmanager/pod_status.go +++ b/pkg/controller/statusmanager/pod_status.go @@ -14,6 +14,7 @@ import ( operv1 "github.com/openshift/api/operator/v1" "github.com/openshift/cluster-network-operator/pkg/names" "github.com/openshift/cluster-network-operator/pkg/util" + cohelpers "github.com/openshift/library-go/pkg/config/clusteroperator/v1helpers" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" @@ -38,6 +39,7 @@ type podState struct { DaemonsetStates []daemonsetState DeploymentStates []deploymentState StatefulsetStates []statefulsetState + InstallComplete bool } // daemonsetState is the internal state we use to check if a rollout has @@ -47,6 +49,7 @@ type daemonsetState struct { LastSeenStatus appsv1.DaemonSetStatus LastChangeTime time.Time + RolloutActive bool } // deploymentState is the same as daemonsetState.. but for deployments! @@ -55,6 +58,7 @@ type deploymentState struct { LastSeenStatus appsv1.DeploymentStatus LastChangeTime time.Time + RolloutActive bool } // statefulsetState is the same as daemonsetState.. but for statefulsets! @@ -63,6 +67,7 @@ type statefulsetState struct { LastSeenStatus appsv1.StatefulSetStatus LastChangeTime time.Time + RolloutActive bool } // SetFromPods sets the operator Degraded/Progressing/Available status, based on @@ -79,7 +84,10 @@ func (status *StatusManager) SetFromPods() { progressing := []string{} hung := []string{} - daemonsetStates, deploymentStates, statefulsetStates := status.getLastPodState() + daemonsetStates, deploymentStates, statefulsetStates, installComplete := status.getLastPodState() + if !status.installComplete && installComplete { + status.installComplete = true + } if (len(daemonSets) + len(deployments) + len(statefulSets)) == 0 { progressing = append(progressing, "Deploying") @@ -87,6 +95,9 @@ func (status *StatusManager) SetFromPods() { for _, ds := range daemonSets { dsName := NewClusteredName(ds) + dsState, hadState := daemonsetStates[dsName] + dsState.RolloutActive = daemonSetRolloutActive(ds, dsState.RolloutActive, status.installComplete) + dsRolloutActive := dsState.RolloutActive dsProgressing := false @@ -97,13 +108,14 @@ func (status *StatusManager) SetFromPods() { progressing = append(progressing, fmt.Sprintf("DaemonSet %q update is rolling out (%d out of %d updated)", dsName.String(), ds.Status.UpdatedNumberScheduled, ds.Status.DesiredNumberScheduled)) dsProgressing = true } else if ds.Status.NumberUnavailable > 0 { - progressing = append(progressing, fmt.Sprintf("DaemonSet %q is not available (awaiting %d nodes)", dsName.String(), ds.Status.NumberUnavailable)) - dsProgressing = true - // Check for any pods in CrashLoopBackOff state and mark the operator as degraded if so. + if dsRolloutActive { + progressing = append(progressing, fmt.Sprintf("DaemonSet %q is not available (awaiting %d nodes)", dsName.String(), ds.Status.NumberUnavailable)) + dsProgressing = true + } if !isNonCritical(ds) { hung = append(hung, status.CheckCrashLoopBackOffPods(dsName, ds.Spec.Selector.MatchLabels, "DaemonSet")...) } - } else if ds.Status.NumberAvailable == 0 && ds.Status.DesiredNumberScheduled > 0 { + } else if ds.Status.NumberAvailable == 0 && dsRolloutActive { progressing = append(progressing, fmt.Sprintf("DaemonSet %q is not yet scheduled on any nodes", dsName.String())) dsProgressing = true } else if ds.Generation > ds.Status.ObservedGeneration { @@ -120,19 +132,20 @@ func (status *StatusManager) SetFromPods() { if dsProgressing && !isNonCritical(ds) { reachedAvailableLevel = false - dsState, exists := daemonsetStates[dsName] - if !exists || !reflect.DeepEqual(dsState.LastSeenStatus, ds.Status) { + if !hadState || !reflect.DeepEqual(dsState.LastSeenStatus, ds.Status) { dsState.LastChangeTime = time.Now() ds.Status.DeepCopyInto(&dsState.LastSeenStatus) - daemonsetStates[dsName] = dsState } // Catch hung rollouts - if exists && (time.Since(dsState.LastChangeTime)) > ProgressTimeout { + if hadState && (time.Since(dsState.LastChangeTime)) > ProgressTimeout { hung = append(hung, fmt.Sprintf("DaemonSet %q rollout is not making progress - last change %s", dsName.String(), dsState.LastChangeTime.Format(time.RFC3339))) empty := "" dsHung = &empty } + } + if dsRolloutActive { + daemonsetStates[dsName] = dsState } else { delete(daemonsetStates, dsName) } @@ -143,6 +156,9 @@ func (status *StatusManager) SetFromPods() { for _, ss := range statefulSets { ssName := NewClusteredName(ss) + ssState, hadState := statefulsetStates[ssName] + ssState.RolloutActive = statefulSetRolloutActive(ss, ssState.RolloutActive, status.installComplete) + ssRolloutActive := ssState.RolloutActive ssProgressing := false @@ -153,13 +169,15 @@ func (status *StatusManager) SetFromPods() { progressing = append(progressing, fmt.Sprintf("StatefulSet %q update is rolling out (%d out of %d updated)", ssName.String(), ss.Status.UpdatedReplicas, ss.Status.Replicas)) ssProgressing = true } else if ss.Status.ReadyReplicas > 0 && ss.Status.ReadyReplicas < ss.Status.Replicas { - progressing = append(progressing, fmt.Sprintf("StatefulSet %q is not available (awaiting %d nodes)", ssName.String(), (ss.Status.Replicas-ss.Status.ReadyReplicas))) - ssProgressing = true + if ssRolloutActive { + progressing = append(progressing, fmt.Sprintf("StatefulSet %q is not available (awaiting %d nodes)", ssName.String(), (ss.Status.Replicas-ss.Status.ReadyReplicas))) + ssProgressing = true + } // Check for any pods in CrashLoopBackOff state and mark the operator as degraded if so. if !isNonCritical(ss) { hung = append(hung, status.CheckCrashLoopBackOffPods(ssName, ss.Spec.Selector.MatchLabels, "StatefulSet")...) } - } else if ss.Status.AvailableReplicas == 0 { + } else if ss.Status.AvailableReplicas == 0 && ssRolloutActive { progressing = append(progressing, fmt.Sprintf("StatefulSet %q is not yet scheduled on any nodes", ssName.String())) ssProgressing = true } else if ss.Status.ObservedGeneration < ss.Generation { @@ -176,19 +194,20 @@ func (status *StatusManager) SetFromPods() { if ssProgressing && !isNonCritical(ss) { reachedAvailableLevel = false - ssState, exists := statefulsetStates[ssName] - if !exists || !reflect.DeepEqual(ssState.LastSeenStatus, ss.Status) { + if !hadState || !reflect.DeepEqual(ssState.LastSeenStatus, ss.Status) { ssState.LastChangeTime = time.Now() ss.Status.DeepCopyInto(&ssState.LastSeenStatus) - statefulsetStates[ssName] = ssState } // Catch hung rollouts - if exists && (time.Since(ssState.LastChangeTime)) > ProgressTimeout { + if hadState && (time.Since(ssState.LastChangeTime)) > ProgressTimeout { hung = append(hung, fmt.Sprintf("StatefulSet %q rollout is not making progress - last change %s", ssName.String(), ssState.LastChangeTime.Format(time.RFC3339))) empty := "" ssHung = &empty } + } + if ssRolloutActive { + statefulsetStates[ssName] = ssState } else { delete(statefulsetStates, ssName) } @@ -199,6 +218,9 @@ func (status *StatusManager) SetFromPods() { for _, dep := range deployments { depName := NewClusteredName(dep) + depState, hadState := deploymentStates[depName] + depState.RolloutActive = deploymentRolloutActive(dep, depState.RolloutActive, status.installComplete) + depRolloutActive := depState.RolloutActive depProgressing := false if isNonCritical(dep) && dep.Status.UnavailableReplicas > 0 && !status.installComplete { @@ -208,13 +230,15 @@ func (status *StatusManager) SetFromPods() { progressing = append(progressing, fmt.Sprintf("Deployment %q update is rolling out (%d out of %d updated)", depName.String(), dep.Status.UpdatedReplicas, dep.Status.Replicas)) depProgressing = true } else if dep.Status.UnavailableReplicas > 0 { - progressing = append(progressing, fmt.Sprintf("Deployment %q is not available (awaiting %d nodes)", depName.String(), dep.Status.UnavailableReplicas)) - depProgressing = true + if depRolloutActive { + progressing = append(progressing, fmt.Sprintf("Deployment %q is not available (awaiting %d nodes)", depName.String(), dep.Status.UnavailableReplicas)) + depProgressing = true + } // Check for any pods in CrashLoopBackOff state and mark the operator as degraded if so. if !isNonCritical(dep) { hung = append(hung, status.CheckCrashLoopBackOffPods(depName, dep.Spec.Selector.MatchLabels, "Deployment")...) } - } else if dep.Status.AvailableReplicas == 0 { + } else if dep.Status.AvailableReplicas == 0 && depRolloutActive { progressing = append(progressing, fmt.Sprintf("Deployment %q is not yet scheduled on any nodes", depName.String())) depProgressing = true } else if dep.Status.ObservedGeneration < dep.Generation { @@ -231,19 +255,20 @@ func (status *StatusManager) SetFromPods() { if depProgressing && !isNonCritical(dep) { reachedAvailableLevel = false - depState, exists := deploymentStates[depName] - if !exists || !reflect.DeepEqual(depState.LastSeenStatus, dep.Status) { + if !hadState || !reflect.DeepEqual(depState.LastSeenStatus, dep.Status) { depState.LastChangeTime = time.Now() dep.Status.DeepCopyInto(&depState.LastSeenStatus) - deploymentStates[depName] = depState } // Catch hung rollouts - if exists && (time.Since(depState.LastChangeTime)) > ProgressTimeout { + if hadState && (time.Since(depState.LastChangeTime)) > ProgressTimeout { hung = append(hung, fmt.Sprintf("Deployment %q rollout is not making progress - last change %s", depName.String(), depState.LastChangeTime.Format(time.RFC3339))) empty := "" depHung = &empty } + } + if depRolloutActive { + deploymentStates[depName] = depState } else { delete(deploymentStates, depName) } @@ -253,7 +278,10 @@ func (status *StatusManager) SetFromPods() { } status.setNotDegraded(PodDeployment) - if err := status.setLastPodState(daemonsetStates, deploymentStates, statefulsetStates); err != nil { + if reachedAvailableLevel && len(progressing) == 0 { + status.installComplete = true + } + if err := status.setLastPodState(daemonsetStates, deploymentStates, statefulsetStates, status.installComplete); err != nil { log.Printf("Failed to set pod state (continuing): %+v\n", err) } @@ -269,10 +297,6 @@ func (status *StatusManager) SetFromPods() { Status: operv1.ConditionTrue}) } - if reachedAvailableLevel && len(progressing) == 0 { - status.installComplete = true - } - if len(hung) > 0 { status.setDegraded(RolloutHung, "RolloutHung", strings.Join(hung, "\n")) } else { @@ -280,10 +304,64 @@ func (status *StatusManager) SetFromPods() { } } +// We only want pod unavailability to count as Progressing when we already know a +// CNO-managed rollout is in flight. The status snapshots below distinguish: +// - rollout started: controller still processing a spec change +// - rollout complete: controller has observed that change and all replicas are healthy +// If we have neither signal after install, the same "unavailable" counters are +// treated as ordinary node reboot churn rather than a network rollout. +func daemonSetRolloutActive(ds *appsv1.DaemonSet, rolloutActive, installComplete bool) bool { + rolloutStarted := ds.Generation > ds.Status.ObservedGeneration || ds.Status.UpdatedNumberScheduled < ds.Status.DesiredNumberScheduled + rolloutComplete := ds.Status.ObservedGeneration >= expectedGeneration(ds.Generation) && + ds.Status.NumberUnavailable == 0 && + (ds.Status.DesiredNumberScheduled == 0 || ds.Status.UpdatedNumberScheduled >= ds.Status.DesiredNumberScheduled) && + (ds.Status.DesiredNumberScheduled == 0 || ds.Status.NumberAvailable >= ds.Status.DesiredNumberScheduled) + + return updateRolloutActive(rolloutActive, installComplete, rolloutStarted, rolloutComplete) +} + +func statefulSetRolloutActive(ss *appsv1.StatefulSet, rolloutActive, installComplete bool) bool { + rolloutStarted := ss.Generation > ss.Status.ObservedGeneration || ss.Status.UpdatedReplicas < ss.Status.Replicas + rolloutComplete := ss.Status.ObservedGeneration >= expectedGeneration(ss.Generation) && + ss.Status.UpdatedReplicas >= ss.Status.Replicas && + ss.Status.ReadyReplicas >= ss.Status.Replicas + + return updateRolloutActive(rolloutActive, installComplete, rolloutStarted, rolloutComplete) +} + +func deploymentRolloutActive(dep *appsv1.Deployment, rolloutActive, installComplete bool) bool { + rolloutStarted := dep.Generation > dep.Status.ObservedGeneration || dep.Status.UpdatedReplicas < dep.Status.Replicas + rolloutComplete := dep.Status.ObservedGeneration >= expectedGeneration(dep.Generation) && + dep.Status.UpdatedReplicas >= dep.Status.Replicas && + dep.Status.UnavailableReplicas == 0 && + (dep.Status.Replicas == 0 || dep.Status.AvailableReplicas >= dep.Status.Replicas) + + return updateRolloutActive(rolloutActive, installComplete, rolloutStarted, rolloutComplete) +} + +// Once install is complete, only explicit rollout signals should reactivate Progressing. +func updateRolloutActive(rolloutActive, installComplete, rolloutStarted, rolloutComplete bool) bool { + if !installComplete || rolloutStarted { + rolloutActive = true + } + if rolloutActive && rolloutComplete { + return false + } + return rolloutActive +} + +// Real workload objects start at generation 1; tests often omit it and leave the zero value. +func expectedGeneration(generation int64) int64 { + if generation > 0 { + return generation + } + return 1 +} + // getLastPodState reads the last-seen daemonset + deployment + statefulset // states from the clusteroperator annotation and parses it. On error, it // returns an empty state, since this should not block updating operator status. -func (status *StatusManager) getLastPodState() (map[ClusteredName]daemonsetState, map[ClusteredName]deploymentState, map[ClusteredName]statefulsetState) { +func (status *StatusManager) getLastPodState() (map[ClusteredName]daemonsetState, map[ClusteredName]deploymentState, map[ClusteredName]statefulsetState, bool) { // with maps allocated daemonsetStates := map[ClusteredName]daemonsetState{} deploymentStates := map[ClusteredName]deploymentState{} @@ -294,12 +372,12 @@ func (status *StatusManager) getLastPodState() (map[ClusteredName]daemonsetState err := status.client.ClientFor("").CRClient().Get(context.TODO(), types.NamespacedName{Name: status.name}, co) if err != nil { log.Printf("Failed to get ClusterOperator: %v", err) - return daemonsetStates, deploymentStates, statefulsetStates + return daemonsetStates, deploymentStates, statefulsetStates, false } lsbytes := co.Annotations[lastSeenAnnotation] if lsbytes == "" { - return daemonsetStates, deploymentStates, statefulsetStates + return daemonsetStates, deploymentStates, statefulsetStates, false } out := podState{} @@ -307,7 +385,7 @@ func (status *StatusManager) getLastPodState() (map[ClusteredName]daemonsetState if err != nil { // No need to return error; just move on log.Printf("failed to unmashal last-seen-status: %v", err) - return daemonsetStates, deploymentStates, statefulsetStates + return daemonsetStates, deploymentStates, statefulsetStates, false } for _, ds := range out.DaemonsetStates { @@ -322,18 +400,42 @@ func (status *StatusManager) getLastPodState() (map[ClusteredName]daemonsetState statefulsetStates[ss.ClusteredName] = ss } - return daemonsetStates, deploymentStates, statefulsetStates + installComplete, err := installCompleteFromLastPodState(lsbytes, co.Status.Conditions) + if err != nil { + log.Printf("failed to unmarshal last-seen install-complete state: %v", err) + return daemonsetStates, deploymentStates, statefulsetStates, false + } + + return daemonsetStates, deploymentStates, statefulsetStates, installComplete +} + +func installCompleteFromLastPodState(annotation string, conditions []configv1.ClusterOperatorStatusCondition) (bool, error) { + raw := struct { + InstallComplete *bool `json:"InstallComplete"` + }{} + if err := json.Unmarshal([]byte(annotation), &raw); err != nil { + return false, err + } + if raw.InstallComplete != nil { + return *raw.InstallComplete, nil + } + + // Older annotations predate InstallComplete; fall back to the persisted + // ClusterOperator availability so upgrades do not re-enter bootstrap mode. + return cohelpers.IsStatusConditionTrue(conditions, configv1.OperatorAvailable), nil } func (status *StatusManager) setLastPodState( dss map[ClusteredName]daemonsetState, deps map[ClusteredName]deploymentState, - sss map[ClusteredName]statefulsetState) error { + sss map[ClusteredName]statefulsetState, + installComplete bool) error { ps := podState{ DaemonsetStates: make([]daemonsetState, 0, len(dss)), DeploymentStates: make([]deploymentState, 0, len(deps)), StatefulsetStates: make([]statefulsetState, 0, len(sss)), + InstallComplete: installComplete, } for nsn, ds := range dss { diff --git a/pkg/controller/statusmanager/status_manager_test.go b/pkg/controller/statusmanager/status_manager_test.go index d3ef6d3ecb..198bfc0125 100644 --- a/pkg/controller/statusmanager/status_manager_test.go +++ b/pkg/controller/statusmanager/status_manager_test.go @@ -819,6 +819,165 @@ func TestStatusManagerSetFromIPsecConfigs(t *testing.T) { } } +func TestStatusManagerSetFromMachineConfigPoolIgnoresNodeRebootChurn(t *testing.T) { + client := fake.NewFakeClient() + status := New(client, "testing", names.StandAloneClusterName) + setFakeListers(status) + no := &operv1.Network{ObjectMeta: metav1.ObjectMeta{Name: names.OPERATOR_CONFIG}, + Spec: operv1.NetworkSpec{DefaultNetwork: operv1.DefaultNetworkDefinition{ + OVNKubernetesConfig: &operv1.OVNKubernetesConfig{IPsecConfig: &operv1.IPsecConfig{Mode: operv1.IPsecModeFull}}}}} + setOC(t, client, no) + setCO(t, client, "testing") + + masterIPsecMachineConfig := mcfgv1.MachineConfig{ObjectMeta: metav1.ObjectMeta{Name: masterMachineConfigIPsecExtName, + Labels: names.MasterRoleMachineConfigLabel(), + OwnerReferences: networkOwnerRef()}, + Spec: mcfgv1.MachineConfigSpec{Extensions: []string{"ipsec"}}} + if err := status.SetMachineConfigs(t.Context(), []mcfgv1.MachineConfig{masterIPsecMachineConfig}); err != nil { + t.Fatalf("error setting machine configs: %v", err) + } + + masterPool := mcfgv1.MachineConfigPool{ + ObjectMeta: metav1.ObjectMeta{Name: "master"}, + Spec: mcfgv1.MachineConfigPoolSpec{MachineConfigSelector: &metav1.LabelSelector{ + MatchLabels: names.MasterRoleMachineConfigLabel(), + }}, + Status: mcfgv1.MachineConfigPoolStatus{ + MachineCount: 3, + ReadyMachineCount: 2, + UpdatedMachineCount: 2, + Configuration: mcfgv1.MachineConfigPoolStatusConfiguration{ + Source: []v1.ObjectReference{{Name: masterMachineConfigIPsecExtName}}, + }, + Conditions: []mcfgv1.MachineConfigPoolCondition{{ + Type: mcfgv1.MachineConfigPoolUpdating, + Status: v1.ConditionTrue, + }}, + }, + } + if err := status.SetFromMachineConfigPool([]mcfgv1.MachineConfigPool{masterPool}); err != nil { + t.Fatalf("error processing machine config pools: %v", err) + } + + _, oc, err := getStatuses(client, "testing") + if err != nil { + t.Fatalf("error getting ClusterOperator: %v", err) + } + if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{{ + Type: operv1.OperatorStatusTypeProgressing, + Status: operv1.ConditionFalse, + }}) { + t.Fatalf("unexpected Status.Conditions: %#v", oc.Status.Conditions) + } +} + +func TestStatusManagerSetFromMachineConfigPoolWaitsForAllMatchingPoolsOnRemoval(t *testing.T) { + client := fake.NewFakeClient() + status := New(client, "testing", names.StandAloneClusterName) + setFakeListers(status) + no := &operv1.Network{ObjectMeta: metav1.ObjectMeta{Name: names.OPERATOR_CONFIG}, + Spec: operv1.NetworkSpec{DefaultNetwork: operv1.DefaultNetworkDefinition{ + OVNKubernetesConfig: &operv1.OVNKubernetesConfig{IPsecConfig: &operv1.IPsecConfig{Mode: operv1.IPsecModeFull}}}}} + setOC(t, client, no) + setCO(t, client, "testing") + + workerIPsecMachineConfig := mcfgv1.MachineConfig{ObjectMeta: metav1.ObjectMeta{Name: workerMachineConfigIPsecExtName, + Labels: names.WorkerRoleMachineConfigLabel(), + OwnerReferences: networkOwnerRef()}, + Spec: mcfgv1.MachineConfigSpec{Extensions: []string{"ipsec"}}} + if err := status.SetMachineConfigs(t.Context(), []mcfgv1.MachineConfig{workerIPsecMachineConfig}); err != nil { + t.Fatalf("error setting machine configs: %v", err) + } + + workerPool := mcfgv1.MachineConfigPool{ + ObjectMeta: metav1.ObjectMeta{Name: "worker"}, + Spec: mcfgv1.MachineConfigPoolSpec{MachineConfigSelector: &metav1.LabelSelector{ + MatchLabels: names.WorkerRoleMachineConfigLabel(), + }}, + Status: mcfgv1.MachineConfigPoolStatus{ + Configuration: mcfgv1.MachineConfigPoolStatusConfiguration{ + Source: []v1.ObjectReference{{Name: workerMachineConfigIPsecExtName}}, + }, + }, + } + customWorkerPool := mcfgv1.MachineConfigPool{ + ObjectMeta: metav1.ObjectMeta{Name: "worker-custom"}, + Spec: mcfgv1.MachineConfigPoolSpec{MachineConfigSelector: &metav1.LabelSelector{ + MatchLabels: names.WorkerRoleMachineConfigLabel(), + }}, + Status: mcfgv1.MachineConfigPoolStatus{ + Configuration: mcfgv1.MachineConfigPoolStatusConfiguration{ + Source: []v1.ObjectReference{{Name: workerMachineConfigIPsecExtName}}, + }, + }, + } + if err := status.SetFromMachineConfigPool([]mcfgv1.MachineConfigPool{workerPool, customWorkerPool}); err != nil { + t.Fatalf("error processing machine config pools: %v", err) + } + + if err := status.SetMachineConfigs(t.Context(), []mcfgv1.MachineConfig{}); err != nil { + t.Fatalf("error setting machine configs: %v", err) + } + + customWorkerPool.Status.Configuration.Source = nil + if err := status.SetFromMachineConfigPool([]mcfgv1.MachineConfigPool{workerPool, customWorkerPool}); err != nil { + t.Fatalf("error processing machine config pools: %v", err) + } + + _, oc, err := getStatuses(client, "testing") + if err != nil { + t.Fatalf("error getting ClusterOperator: %v", err) + } + if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{{ + Type: operv1.OperatorStatusTypeProgressing, + Status: operv1.ConditionTrue, + }}) { + t.Fatalf("unexpected Status.Conditions: %#v", oc.Status.Conditions) + } + if !status.renderedMachineConfigs["worker"].Has(workerMachineConfigIPsecExtName) { + t.Fatalf("expected rendered machine config to stay cached until every matching pool removes it: %#v", status.renderedMachineConfigs) + } + if !status.machineConfigsBeingRemoved["worker"].Has(workerMachineConfigIPsecExtName) { + t.Fatalf("expected machine config removal state to stay cached until every matching pool removes it: %#v", status.machineConfigsBeingRemoved) + } + renderedMachineConfigs, err := status.getLastRenderedMachineConfigState() + if err != nil { + t.Fatalf("error getting rendered machine config state: %v", err) + } + if !renderedMachineConfigs["worker"].Has(workerMachineConfigIPsecExtName) { + t.Fatalf("expected rendered machine config annotation to keep %s until every matching pool removes it: %#v", workerMachineConfigIPsecExtName, renderedMachineConfigs) + } + + workerPool.Status.Configuration.Source = nil + if err := status.SetFromMachineConfigPool([]mcfgv1.MachineConfigPool{workerPool, customWorkerPool}); err != nil { + t.Fatalf("error processing machine config pools: %v", err) + } + + _, oc, err = getStatuses(client, "testing") + if err != nil { + t.Fatalf("error getting ClusterOperator: %v", err) + } + if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{{ + Type: operv1.OperatorStatusTypeProgressing, + Status: operv1.ConditionFalse, + }}) { + t.Fatalf("unexpected Status.Conditions: %#v", oc.Status.Conditions) + } + if _, ok := status.renderedMachineConfigs["worker"]; ok { + t.Fatalf("expected rendered machine config cache to be cleared after all matching pools remove it: %#v", status.renderedMachineConfigs) + } + if _, ok := status.machineConfigsBeingRemoved["worker"]; ok { + t.Fatalf("expected machine config removal state to be cleared after all matching pools remove it: %#v", status.machineConfigsBeingRemoved) + } + renderedMachineConfigs, err = status.getLastRenderedMachineConfigState() + if err != nil { + t.Fatalf("error getting rendered machine config state: %v", err) + } + if _, ok := renderedMachineConfigs["worker"]; ok { + t.Fatalf("expected rendered machine config annotation to be cleared after all matching pools remove it: %#v", renderedMachineConfigs) + } +} + func TestStatusManagerSetFromDaemonSets(t *testing.T) { client := fake.NewFakeClient() status := New(client, "testing", names.StandAloneClusterName) @@ -1316,7 +1475,8 @@ func TestStatusManagerSetFromDaemonSets(t *testing.T) { set(t, client, dsNC) status.SetFromPods() - // We should now be Progressing, but not un-Available + // Non-critical daemonsets that are merely unavailable after install should not + // move the operator into Progressing without an actual rollout in flight. co, oc, err = getStatuses(client, "testing") if err != nil { t.Fatalf("error getting ClusterOperator: %v", err) @@ -1328,7 +1488,7 @@ func TestStatusManagerSetFromDaemonSets(t *testing.T) { }, { Type: operv1.OperatorStatusTypeProgressing, - Status: operv1.ConditionTrue, + Status: operv1.ConditionFalse, }, { Type: operv1.OperatorStatusTypeUpgradeable, @@ -1368,7 +1528,7 @@ func TestStatusManagerSetFromDaemonSets(t *testing.T) { }, { Type: operv1.OperatorStatusTypeProgressing, - Status: operv1.ConditionTrue, + Status: operv1.ConditionFalse, }, { Type: operv1.OperatorStatusTypeUpgradeable, @@ -1522,7 +1682,7 @@ func TestStatusManagerSetFromDeployments(t *testing.T) { } depB := &appsv1.Deployment{ - ObjectMeta: metav1.ObjectMeta{Namespace: "one", Name: "beta", Labels: sl}, + ObjectMeta: metav1.ObjectMeta{Namespace: "one", Name: "beta", Generation: 1, Labels: sl}, Status: appsv1.DeploymentStatus{ Replicas: 1, UpdatedReplicas: 1, @@ -1644,6 +1804,44 @@ func TestStatusManagerSetFromDeployments(t *testing.T) { t.Fatalf("unexpected Status.Versions: %#v", co.Status.Versions) } + depB.Status.UpdatedReplicas = depB.Status.Replicas + depB.Status.UnavailableReplicas = 1 + depB.Status.AvailableReplicas = 0 + depB.Status.ObservedGeneration = depB.Generation + + setStatus(t, client, depB) + status.SetFromPods() + + co, oc, err = getStatuses(client, "testing") + if err != nil { + t.Fatalf("error getting ClusterOperator: %v", err) + } + // We should still be Progressing even after the deployment controller has + // observed the new generation, because the rollout is still converging. + if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{ + { + Type: operv1.OperatorStatusTypeDegraded, + Status: operv1.ConditionFalse, + }, + { + Type: operv1.OperatorStatusTypeProgressing, + Status: operv1.ConditionTrue, + }, + { + Type: operv1.OperatorStatusTypeUpgradeable, + Status: operv1.ConditionTrue, + }, + { + Type: operv1.OperatorStatusTypeAvailable, + Status: operv1.ConditionTrue, + }, + }) { + t.Fatalf("unexpected Status.Conditions: %#v", oc.Status.Conditions) + } + if len(co.Status.Versions) != 1 { + t.Fatalf("unexpected Status.Versions: %#v", co.Status.Versions) + } + // intermission: set back last-seen times by an hour, see that we mark // as hung ps = getLastPodState(t, client, "testing") @@ -1695,6 +1893,7 @@ func TestStatusManagerSetFromDeployments(t *testing.T) { depB.Status.UpdatedReplicas = depB.Status.Replicas depB.Status.UnavailableReplicas = 0 depB.Status.AvailableReplicas = depB.Status.Replicas + depB.Status.ObservedGeneration = depB.Generation setStatus(t, client, depB) status.SetFromPods() @@ -1725,6 +1924,373 @@ func TestStatusManagerSetFromDeployments(t *testing.T) { if len(co.Status.Versions) != 1 { t.Fatalf("unexpected Status.Versions: %#v", co.Status.Versions) } + + err = client.ClientFor("").CRClient().Get(t.Context(), types.NamespacedName{Namespace: depB.Namespace, Name: depB.Name}, depB) + if err != nil { + t.Fatalf("error getting Deployment: %v", err) + } + + depB.Status.UnavailableReplicas = 1 + depB.Status.AvailableReplicas = 0 + setStatus(t, client, depB) + status.SetFromPods() + + _, oc, err = getStatuses(client, "testing") + if err != nil { + t.Fatalf("error getting ClusterOperator: %v", err) + } + // A single unavailable replica without a rollout in flight should not set + // Progressing; this is the reboot-churn case. + if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{ + { + Type: operv1.OperatorStatusTypeDegraded, + Status: operv1.ConditionFalse, + }, + { + Type: operv1.OperatorStatusTypeProgressing, + Status: operv1.ConditionFalse, + }, + { + Type: operv1.OperatorStatusTypeUpgradeable, + Status: operv1.ConditionTrue, + }, + { + Type: operv1.OperatorStatusTypeAvailable, + Status: operv1.ConditionTrue, + }, + }) { + t.Fatalf("unexpected Status.Conditions: %#v", oc.Status.Conditions) + } +} + +func TestStatusManagerRestoresInstallCompleteAfterRestart(t *testing.T) { + client := fake.NewFakeClient() + status := New(client, "testing", names.StandAloneClusterName) + status.clock = testingclock.NewFakeClock(time.Now()) + setFakeListers(status) + no := &operv1.Network{ObjectMeta: metav1.ObjectMeta{Name: names.OPERATOR_CONFIG}} + setOC(t, client, no) + + depA := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{Namespace: "one", Name: "alpha", Generation: 1, Labels: sl}, + Status: appsv1.DeploymentStatus{ + Replicas: 1, + UpdatedReplicas: 1, + AvailableReplicas: 1, + UnavailableReplicas: 0, + ObservedGeneration: 1, + }, + } + set(t, client, depA) + + status.SetFromPods() + + ps := getLastPodState(t, client, "testing") + if !ps.InstallComplete { + t.Fatal("expected installComplete to be persisted once pods are stable") + } + + depNC := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "one", + Name: "non-critical", + Generation: 1, + Annotations: map[string]string{ + names.NonCriticalAnnotation: "", + }, + Labels: sl, + }, + Status: appsv1.DeploymentStatus{ + Replicas: 1, + UpdatedReplicas: 1, + AvailableReplicas: 0, + UnavailableReplicas: 1, + ObservedGeneration: 1, + }, + } + set(t, client, depNC) + + restarted := New(client, "testing", names.StandAloneClusterName) + restarted.clock = testingclock.NewFakeClock(time.Now()) + setFakeListers(restarted) + restarted.SetFromPods() + + _, oc, err := getStatuses(client, "testing") + if err != nil { + t.Fatalf("error getting ClusterOperator: %v", err) + } + if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{ + { + Type: operv1.OperatorStatusTypeDegraded, + Status: operv1.ConditionFalse, + }, + { + Type: operv1.OperatorStatusTypeProgressing, + Status: operv1.ConditionFalse, + }, + { + Type: operv1.OperatorStatusTypeUpgradeable, + Status: operv1.ConditionTrue, + }, + { + Type: operv1.OperatorStatusTypeAvailable, + Status: operv1.ConditionTrue, + }, + }) { + t.Fatalf("unexpected Status.Conditions: %#v", oc.Status.Conditions) + } +} + +func TestStatusManagerRestoresInstallCompleteFromLegacyAnnotation(t *testing.T) { + client := fake.NewFakeClient() + status := New(client, "testing", names.StandAloneClusterName) + status.clock = testingclock.NewFakeClock(time.Now()) + setFakeListers(status) + no := &operv1.Network{ObjectMeta: metav1.ObjectMeta{Name: names.OPERATOR_CONFIG}} + setOC(t, client, no) + + depA := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{Namespace: "one", Name: "alpha", Generation: 1, Labels: sl}, + Status: appsv1.DeploymentStatus{ + Replicas: 1, + UpdatedReplicas: 1, + AvailableReplicas: 1, + UnavailableReplicas: 0, + ObservedGeneration: 1, + }, + } + set(t, client, depA) + + status.SetFromPods() + + ps := getLastPodState(t, client, "testing") + legacyPodState := struct { + DaemonsetStates []daemonsetState + DeploymentStates []deploymentState + StatefulsetStates []statefulsetState + }{ + DaemonsetStates: ps.DaemonsetStates, + DeploymentStates: ps.DeploymentStates, + StatefulsetStates: ps.StatefulsetStates, + } + co, err := getCO(client, "testing") + if err != nil { + t.Fatalf("error getting ClusterOperator: %v", err) + } + legacyStateBytes, err := json.Marshal(legacyPodState) + if err != nil { + t.Fatalf("error marshalling legacy pod state: %v", err) + } + co.Annotations[lastSeenAnnotation] = string(legacyStateBytes) + if err := client.ClientFor("").CRClient().Update(t.Context(), co); err != nil { + t.Fatalf("error updating ClusterOperator: %v", err) + } + + depNC := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "one", + Name: "non-critical", + Generation: 1, + Annotations: map[string]string{ + names.NonCriticalAnnotation: "", + }, + Labels: sl, + }, + Status: appsv1.DeploymentStatus{ + Replicas: 1, + UpdatedReplicas: 1, + AvailableReplicas: 0, + UnavailableReplicas: 1, + ObservedGeneration: 1, + }, + } + set(t, client, depNC) + + restarted := New(client, "testing", names.StandAloneClusterName) + restarted.clock = testingclock.NewFakeClock(time.Now()) + setFakeListers(restarted) + restarted.SetFromPods() + + if !restarted.installComplete { + t.Fatal("expected installComplete to be restored from ClusterOperator availability when legacy annotation omits it") + } + + _, oc, err := getStatuses(client, "testing") + if err != nil { + t.Fatalf("error getting ClusterOperator: %v", err) + } + if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{ + { + Type: operv1.OperatorStatusTypeDegraded, + Status: operv1.ConditionFalse, + }, + { + Type: operv1.OperatorStatusTypeProgressing, + Status: operv1.ConditionFalse, + }, + { + Type: operv1.OperatorStatusTypeUpgradeable, + Status: operv1.ConditionTrue, + }, + { + Type: operv1.OperatorStatusTypeAvailable, + Status: operv1.ConditionTrue, + }, + }) { + t.Fatalf("unexpected Status.Conditions: %#v", oc.Status.Conditions) + } +} + +func TestStatusManagerRestoresActiveRolloutAfterRestart(t *testing.T) { + client := fake.NewFakeClient() + status := New(client, "testing", names.StandAloneClusterName) + status.clock = testingclock.NewFakeClock(time.Now()) + setFakeListers(status) + no := &operv1.Network{ObjectMeta: metav1.ObjectMeta{Name: names.OPERATOR_CONFIG}} + setOC(t, client, no) + + depA := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{Namespace: "one", Name: "alpha", Generation: 1, Labels: sl}, + Status: appsv1.DeploymentStatus{ + Replicas: 1, + UpdatedReplicas: 1, + AvailableReplicas: 1, + UnavailableReplicas: 0, + ObservedGeneration: 1, + }, + } + set(t, client, depA) + status.SetFromPods() + + depB := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{Namespace: "one", Name: "beta", Generation: 1, Labels: sl}, + Status: appsv1.DeploymentStatus{ + Replicas: 1, + UpdatedReplicas: 0, + AvailableReplicas: 1, + UnavailableReplicas: 0, + ObservedGeneration: 0, + }, + Spec: appsv1.DeploymentSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": "beta"}, + }, + }, + } + set(t, client, depB) + status.SetFromPods() + + depB.Status.UpdatedReplicas = depB.Status.Replicas + depB.Status.AvailableReplicas = 0 + depB.Status.UnavailableReplicas = 1 + depB.Status.ObservedGeneration = depB.Generation + setStatus(t, client, depB) + + restarted := New(client, "testing", names.StandAloneClusterName) + restarted.clock = testingclock.NewFakeClock(time.Now()) + setFakeListers(restarted) + restarted.SetFromPods() + + _, oc, err := getStatuses(client, "testing") + if err != nil { + t.Fatalf("error getting ClusterOperator: %v", err) + } + if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{ + { + Type: operv1.OperatorStatusTypeDegraded, + Status: operv1.ConditionFalse, + }, + { + Type: operv1.OperatorStatusTypeProgressing, + Status: operv1.ConditionTrue, + }, + { + Type: operv1.OperatorStatusTypeUpgradeable, + Status: operv1.ConditionTrue, + }, + { + Type: operv1.OperatorStatusTypeAvailable, + Status: operv1.ConditionTrue, + }, + }) { + t.Fatalf("unexpected Status.Conditions: %#v", oc.Status.Conditions) + } +} + +func TestStatusManagerRestoresStatefulSetActiveRolloutAfterRestart(t *testing.T) { + client := fake.NewFakeClient() + status := New(client, "testing", names.StandAloneClusterName) + status.clock = testingclock.NewFakeClock(time.Now()) + setFakeListers(status) + no := &operv1.Network{ObjectMeta: metav1.ObjectMeta{Name: names.OPERATOR_CONFIG}} + setOC(t, client, no) + + ssA := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{Namespace: "one", Name: "alpha", Generation: 1, Labels: sl}, + Status: appsv1.StatefulSetStatus{ + Replicas: 1, + UpdatedReplicas: 1, + ReadyReplicas: 1, + AvailableReplicas: 1, + ObservedGeneration: 1, + }, + } + set(t, client, ssA) + status.SetFromPods() + + ssB := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{Namespace: "one", Name: "beta", Generation: 1, Labels: sl}, + Status: appsv1.StatefulSetStatus{ + Replicas: 2, + UpdatedReplicas: 0, + ReadyReplicas: 2, + AvailableReplicas: 2, + ObservedGeneration: 0, + }, + Spec: appsv1.StatefulSetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": "beta"}, + }, + }, + } + set(t, client, ssB) + status.SetFromPods() + + ssB.Status.UpdatedReplicas = ssB.Status.Replicas + ssB.Status.ReadyReplicas = ssB.Status.Replicas - 1 + ssB.Status.AvailableReplicas = ssB.Status.Replicas - 1 + ssB.Status.ObservedGeneration = ssB.Generation + setStatus(t, client, ssB) + + restarted := New(client, "testing", names.StandAloneClusterName) + restarted.clock = testingclock.NewFakeClock(time.Now()) + setFakeListers(restarted) + restarted.SetFromPods() + + _, oc, err := getStatuses(client, "testing") + if err != nil { + t.Fatalf("error getting ClusterOperator: %v", err) + } + if !conditionsInclude(oc.Status.Conditions, []operv1.OperatorCondition{ + { + Type: operv1.OperatorStatusTypeDegraded, + Status: operv1.ConditionFalse, + }, + { + Type: operv1.OperatorStatusTypeProgressing, + Status: operv1.ConditionTrue, + }, + { + Type: operv1.OperatorStatusTypeUpgradeable, + Status: operv1.ConditionTrue, + }, + { + Type: operv1.OperatorStatusTypeAvailable, + Status: operv1.ConditionTrue, + }, + }) { + t.Fatalf("unexpected Status.Conditions: %#v", oc.Status.Conditions) + } } func getLastPodState(t *testing.T, client cnoclient.Client, name string) podState { @@ -1924,9 +2490,10 @@ func TestStatusManagerCheckCrashLoopBackOffPods(t *testing.T) { // Check that crashlooping deployments also are detected dep := &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ - Namespace: "three", - Name: "gamma", - Labels: sl, + Namespace: "three", + Name: "gamma", + Generation: 1, + Labels: sl, }, Spec: appsv1.DeploymentSpec{ Selector: &metav1.LabelSelector{ diff --git a/pkg/util/machineconfig/util.go b/pkg/util/machineconfig/util.go index 8566e52751..f1f1c28fb1 100644 --- a/pkg/util/machineconfig/util.go +++ b/pkg/util/machineconfig/util.go @@ -26,21 +26,31 @@ func IsUserDefinedIPsecMachineConfig(machineConfig *mcfgv1.MachineConfig) bool { // AreMachineConfigsRenderedOnPool returns true if machineConfigs are completely rendered on the given machine config // pool status, otherwise returns false. func AreMachineConfigsRenderedOnPool(status mcfgv1.MachineConfigPoolStatus, machineConfigs sets.Set[string]) bool { - checkSource := func(sourceNames sets.Set[string], machineConfigs sets.Set[string]) bool { - return sourceNames.IsSuperset(machineConfigs) - } return status.MachineCount == status.UpdatedMachineCount && - checkSourceInMachineConfigPoolStatus(status, machineConfigs, checkSource) + AreMachineConfigsRenderedOnPoolSource(status, machineConfigs) } // AreMachineConfigsRemovedFromPool returns true if machineConfigs are completely removed on the given machine config // pool status, otherwise returns false. func AreMachineConfigsRemovedFromPool(status mcfgv1.MachineConfigPoolStatus, machineConfigs sets.Set[string]) bool { + return status.MachineCount == status.UpdatedMachineCount && + AreMachineConfigsRemovedFromPoolSource(status, machineConfigs) +} + +// AreMachineConfigsRenderedOnPoolSource returns true if machineConfigs are present in the pool's rendered source list. +func AreMachineConfigsRenderedOnPoolSource(status mcfgv1.MachineConfigPoolStatus, machineConfigs sets.Set[string]) bool { + checkSource := func(sourceNames sets.Set[string], machineConfigs sets.Set[string]) bool { + return sourceNames.IsSuperset(machineConfigs) + } + return checkSourceInMachineConfigPoolStatus(status, machineConfigs, checkSource) +} + +// AreMachineConfigsRemovedFromPoolSource returns true if machineConfigs are absent from the pool's rendered source list. +func AreMachineConfigsRemovedFromPoolSource(status mcfgv1.MachineConfigPoolStatus, machineConfigs sets.Set[string]) bool { checkSource := func(sourceNames sets.Set[string], machineConfigs sets.Set[string]) bool { return !sourceNames.HasAny(machineConfigs.UnsortedList()...) } - return status.MachineCount == status.UpdatedMachineCount && - checkSourceInMachineConfigPoolStatus(status, machineConfigs, checkSource) + return checkSourceInMachineConfigPoolStatus(status, machineConfigs, checkSource) } func checkSourceInMachineConfigPoolStatus(machineConfigStatus mcfgv1.MachineConfigPoolStatus, machineConfigs sets.Set[string],