Skip to content

Commit 73491b8

Browse files
committed
[RayCluster] Status includes head containter status message
Signed-off-by: Spencer Peterson <[email protected]>
1 parent 79bd749 commit 73491b8

File tree

2 files changed

+95
-0
lines changed

2 files changed

+95
-0
lines changed

ray-operator/controllers/ray/utils/util.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,16 @@ func FindHeadPodReadyCondition(headPod *corev1.Pod) metav1.Condition {
9191
headPodReadyCondition.Status = metav1.ConditionStatus(cond.Status)
9292
headPodReadyCondition.Message = cond.Message
9393

94+
// Add details from failed or waiting container statuses if available.
95+
details := containerStatusDetails(headPod)
96+
if details != "" {
97+
if headPodReadyCondition.Message == "" {
98+
headPodReadyCondition.Message = details
99+
} else {
100+
headPodReadyCondition.Message += "; " + details
101+
}
102+
}
103+
94104
// Determine the reason; default to HeadPodRunningAndReady if the headPod is ready but no specific reason is provided
95105
reason := cond.Reason
96106
if cond.Status == corev1.ConditionTrue && reason == "" {
@@ -108,6 +118,18 @@ func FindHeadPodReadyCondition(headPod *corev1.Pod) metav1.Condition {
108118
return headPodReadyCondition
109119
}
110120

121+
func containerStatusDetails(pod *corev1.Pod) string {
122+
var details []string
123+
for _, status := range pod.Status.ContainerStatuses {
124+
if status.State.Waiting != nil {
125+
details = append(details, fmt.Sprintf("%s: %s: %s", status.Name, status.State.Waiting.Reason, status.State.Waiting.Message))
126+
} else if status.State.Terminated != nil {
127+
details = append(details, fmt.Sprintf("%s: %s: %s", status.Name, status.State.Terminated.Reason, status.State.Terminated.Message))
128+
}
129+
}
130+
return strings.Join(details, ", ")
131+
}
132+
111133
// FindRayClusterSuspendStatus returns the current suspend status from two conditions:
112134
// 1. rayv1.RayClusterSuspending
113135
// 2. rayv1.RayClusterSuspended

ray-operator/controllers/ray/utils/util_test.go

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -977,6 +977,79 @@ func TestFindHeadPodReadyCondition(t *testing.T) {
977977
}
978978
}
979979

980+
func TestFindHeadPodReadyMessage(t *testing.T) {
981+
tests := []struct {
982+
name string
983+
message string
984+
status []corev1.ContainerStatus
985+
expectedMessage string
986+
}{{
987+
name: "no message no status want nothing",
988+
}, {
989+
name: "only reason",
990+
message: "TooEarlyInTheMorning",
991+
expectedMessage: "TooEarlyInTheMorning",
992+
}, {
993+
name: "one reason one status",
994+
message: "containers not ready",
995+
status: []corev1.ContainerStatus{{
996+
Name: "ray",
997+
State: corev1.ContainerState{
998+
Waiting: &corev1.ContainerStateWaiting{
999+
Reason: "ImagePullBackOff",
1000+
Message: `Back-off pulling image royproject/roy:latest: ErrImagePull: rpc error: code = NotFound`,
1001+
},
1002+
},
1003+
}},
1004+
expectedMessage: `containers not ready; ray: ImagePullBackOff: Back-off pulling image royproject/roy:latest: ErrImagePull: rpc error: code = NotFound`,
1005+
}, {
1006+
name: "one reason two statuses",
1007+
message: "aesthetic problems",
1008+
status: []corev1.ContainerStatus{{
1009+
Name: "indigo",
1010+
State: corev1.ContainerState{
1011+
Waiting: &corev1.ContainerStateWaiting{
1012+
Reason: "BadColor",
1013+
Message: "too blue",
1014+
},
1015+
},
1016+
}, {
1017+
Name: "circle",
1018+
State: corev1.ContainerState{
1019+
Terminated: &corev1.ContainerStateTerminated{
1020+
Reason: "BadGeometry",
1021+
Message: "too round",
1022+
},
1023+
},
1024+
}},
1025+
expectedMessage: "aesthetic problems; indigo: BadColor: too blue, circle: BadGeometry: too round",
1026+
}, {
1027+
name: "no reason one status",
1028+
status: []corev1.ContainerStatus{{
1029+
Name: "my-image",
1030+
State: corev1.ContainerState{
1031+
Terminated: &corev1.ContainerStateTerminated{
1032+
Reason: "Crashed",
1033+
Message: "bash not found",
1034+
},
1035+
},
1036+
}},
1037+
expectedMessage: "my-image: Crashed: bash not found",
1038+
}}
1039+
1040+
for _, tc := range tests {
1041+
t.Run(tc.name, func(t *testing.T) {
1042+
pod := createRayHeadPodWithPhaseAndCondition(corev1.PodPending, corev1.PodReady, corev1.ConditionFalse)
1043+
pod.Status.Conditions[0].Message = tc.message
1044+
pod.Status.ContainerStatuses = tc.status
1045+
cond := FindHeadPodReadyCondition(pod)
1046+
if cond.Message != tc.expectedMessage {
1047+
t.Errorf("FindHeadPodReadyCondition(...) returned condition with message %q, but wanted %q", cond.Message, tc.expectedMessage)
1048+
}
1049+
})
1050+
}
1051+
}
1052+
9801053
func TestErrRayClusterReplicaFailureReason(t *testing.T) {
9811054
assert.Equal(t, "FailedDeleteAllPods", RayClusterReplicaFailureReason(ErrFailedDeleteAllPods))
9821055
assert.Equal(t, "FailedDeleteHeadPod", RayClusterReplicaFailureReason(ErrFailedDeleteHeadPod))

0 commit comments

Comments
 (0)