diff --git a/Makefile b/Makefile index 188566fc..96dd42a6 100644 --- a/Makefile +++ b/Makefile @@ -53,7 +53,7 @@ endif OPERATOR_SDK_VERSION ?= v1.36.1 # Image URL to use all building/pushing image targets -IMG ?= slinky.slurm.net/slurm-operator:$(VERSION) +IMG ?= ebennerv/slurm-operator:$(VERSION) # Set the namespaces that helm tests will run against SLURM_NAMESPACE ?= slurm diff --git a/cmd/manager/main.go b/cmd/manager/main.go index 38d5c587..9fa92d9d 100644 --- a/cmd/manager/main.go +++ b/cmd/manager/main.go @@ -21,10 +21,10 @@ import ( "sigs.k8s.io/controller-runtime/pkg/healthz" "sigs.k8s.io/controller-runtime/pkg/log/zap" - slinkyv1alpha1 "github.com/SlinkyProject/slurm-operator/api/v1alpha1" - "github.com/SlinkyProject/slurm-operator/internal/controller/cluster" - "github.com/SlinkyProject/slurm-operator/internal/controller/nodeset" - "github.com/SlinkyProject/slurm-operator/internal/resources" + slinkyv1alpha1 "github.com/togethercomputer/slurm-operator/api/v1alpha1" + "github.com/togethercomputer/slurm-operator/internal/controller/cluster" + "github.com/togethercomputer/slurm-operator/internal/controller/nodeset" + "github.com/togethercomputer/slurm-operator/internal/resources" //+kubebuilder:scaffold:imports ) diff --git a/cmd/webhook/main.go b/cmd/webhook/main.go index afe85af5..aa46f604 100644 --- a/cmd/webhook/main.go +++ b/cmd/webhook/main.go @@ -20,7 +20,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log/zap" "sigs.k8s.io/controller-runtime/pkg/metrics/server" - slinkyv1alpha1 "github.com/SlinkyProject/slurm-operator/api/v1alpha1" + slinkyv1alpha1 "github.com/togethercomputer/slurm-operator/api/v1alpha1" //+kubebuilder:scaffold:imports ) diff --git a/go.mod b/go.mod index aaa0c617..7885a90e 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,6 @@ -module github.com/SlinkyProject/slurm-operator +module github.com/togethercomputer/slurm-operator -go 1.22.0 - -toolchain go1.22.5 +go 1.25.0 require ( github.com/SlinkyProject/slurm-client v0.1.0-20241108145209-7bfdd4923f75 @@ -126,3 +124,5 @@ require ( sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect sigs.k8s.io/yaml v1.4.0 // indirect ) + +replace k8s.io/kubernetes => k8s.io/kubernetes v1.30.3 diff --git a/helm/slurm-operator/templates/operator/deployment.yaml b/helm/slurm-operator/templates/operator/deployment.yaml index 31c4cf5c..97216aca 100644 --- a/helm/slurm-operator/templates/operator/deployment.yaml +++ b/helm/slurm-operator/templates/operator/deployment.yaml @@ -27,6 +27,8 @@ spec: hostname: {{ include "slurm-operator.name" . }} priorityClassName: {{ default "" .Values.priorityClassName }} serviceAccountName: {{ include "slurm-operator.operator.serviceAccountName" . }} + tolerations: {{- toYaml .Values.operator.tolerations | nindent 6 }} + affinity: {{- toYaml .Values.operator.affinity | nindent 6 }} {{- include "slurm-operator.imagePullSecrets" . | nindent 6 }} containers: - name: slurm-operator diff --git a/helm/slurm-operator/templates/operator/rbac.yaml b/helm/slurm-operator/templates/operator/rbac.yaml index d0259251..b5e663ee 100644 --- a/helm/slurm-operator/templates/operator/rbac.yaml +++ b/helm/slurm-operator/templates/operator/rbac.yaml @@ -79,6 +79,7 @@ rules: - get - list - watch + - update - apiGroups: - "" resources: diff --git a/helm/slurm-operator/templates/webhook/deployment.yaml b/helm/slurm-operator/templates/webhook/deployment.yaml index 3dd0eb3d..6eee5ab3 100644 --- a/helm/slurm-operator/templates/webhook/deployment.yaml +++ b/helm/slurm-operator/templates/webhook/deployment.yaml @@ -29,6 +29,8 @@ spec: hostname: {{ include "slurm-operator.webhook.name" . }} priorityClassName: {{ default "" .Values.priorityClassName }} serviceAccountName: {{ include "slurm-operator.webhook.serviceAccountName" . }} + tolerations: {{- toYaml .Values.webhook.tolerations | nindent 6 }} + affinity: {{- toYaml .Values.webhook.affinity | nindent 6 }} {{- include "slurm-operator.imagePullSecrets" . | nindent 6 }} containers: - name: webhook diff --git a/helm/slurm-operator/values.yaml b/helm/slurm-operator/values.yaml index 8850a1e6..e23855dc 100644 --- a/helm/slurm-operator/values.yaml +++ b/helm/slurm-operator/values.yaml @@ -75,6 +75,11 @@ operator: # Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity affinity: {} # + # -- (list) + # Configure pod tolerations. + # Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ + tolerations: [] + # # -- (object) # Set container resource requests and limits for Kubernetes Pod scheduling. # Ref: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-requests-and-limits-of-pod-and-container diff --git a/helm/slurm/templates/_slurm.tpl b/helm/slurm/templates/_slurm.tpl index 8b116665..460ffd07 100644 --- a/helm/slurm/templates/_slurm.tpl +++ b/helm/slurm/templates/_slurm.tpl @@ -217,6 +217,27 @@ Define slurmd capabilities - SYS_NICE {{- end }} +{{/* +Determine login image repository +*/}} +{{- define "slurm.login.image.repository" -}} +{{- .Values.login.image.repository | default (printf "%s/sackd" (include "slurm.image.repository" .)) -}} +{{- end }} + +{{/* +Define login image tag +*/}} +{{- define "slurm.login.image.tag" -}} +{{- .Values.login.image.tag | default (include "slurm.image.tag" .) -}} +{{- end }} + +{{/* +Determine login image reference (repo:tag) +*/}} +{{- define "slurm.login.imageRef" -}} +{{- printf "%s:%s" (include "slurm.login.image.repository" .) (include "slurm.login.image.tag" .) | quote -}} +{{- end }} + {{/* Define restapi name */}} @@ -300,6 +321,30 @@ Define cluster secret name {{- printf "%s-token-%s" .Release.Name (include "slurm.user" .) -}} {{- end }} +{{/* +Define login name +*/}} +{{- define "slurm.login.name" -}} +{{ printf "%s-login" .Release.Name }} +{{- end }} + +{{/* +Define login labels +*/}} +{{- define "slurm.login.labels" -}} +app.kubernetes.io/component: login +{{ include "slurm.login.selectorLabels" . }} +{{ include "slurm.labels" . }} +{{- end }} + +{{/* +Define login selectorLabels +*/}} +{{- define "slurm.login.selectorLabels" -}} +app.kubernetes.io/name: login +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + {{/* Define token name */}} diff --git a/helm/slurm/templates/accounting/accounting-statefulset.yaml b/helm/slurm/templates/accounting/accounting-statefulset.yaml index ace99b75..7b0faad4 100644 --- a/helm/slurm/templates/accounting/accounting-statefulset.yaml +++ b/helm/slurm/templates/accounting/accounting-statefulset.yaml @@ -46,6 +46,10 @@ spec: affinity: {{- .Values.accounting.affinity | toYaml | nindent 8 }} {{- end }}{{- /* if .Values.accounting.affinity */}} + {{- if .Values.accounting.tolerations }} + tolerations: + {{- .Values.accounting.tolerations | toYaml | nindent 8 }} + {{- end }}{{- /* if .Values.accounting.tolerations */}} {{- include "slurm.imagePullSecrets" $ | nindent 6 }} initContainers: - name: init diff --git a/helm/slurm/templates/compute/compute-nodeset.yaml b/helm/slurm/templates/compute/compute-nodeset.yaml index 87187fac..24f39993 100644 --- a/helm/slurm/templates/compute/compute-nodeset.yaml +++ b/helm/slurm/templates/compute/compute-nodeset.yaml @@ -108,6 +108,8 @@ spec: ports: - name: slurmd containerPort: {{ include "slurm.compute.port" $ }} + - name: ssh + containerPort: 22 startupProbe: exec: command: @@ -135,6 +137,14 @@ spec: - name: {{ $vct.metadata.name }} mountPath: {{ $vct.spec.mountPath | default (printf "/mnt/%s" $vct.metadata.name ) }} {{- end }}{{- /* range $vct := $nodeset.volumeClaimTemplates */}} + - mountPath: /dev/shm + name: dshm + {{- if and $nodeset.persistence $nodeset.persistence.existingDataClaims }} + {{- range $nodeset.persistence.existingDataClaims }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + {{- end }}{{- /* range $nodeset.persistence.existingDataClaims */}} + {{- end }}{{- /* if $nodeset.persistence.existingDataClaims */}} volumes: {{- include "slurm.volumes" $ | nindent 8 }} - name: slurm-config @@ -143,6 +153,17 @@ spec: sources: - secret: name: {{ include "slurm.auth.secretName" $ }} + - emptyDir: + medium: Memory + sizeLimit: {{ $nodeset.shmSize }} + name: dshm + {{- if and $nodeset.persistence $nodeset.persistence.existingDataClaims }} + {{- range $nodeset.persistence.existingDataClaims }} + - name: {{ .name }} + persistentVolumeClaim: + claimName: {{ .name }} + {{- end }}{{- /* range $nodeset.persistence.existingDataClaims */}} + {{- end }}{{- /* if $nodeset.persistence.existingDataClaims */}} {{- if $nodeset.volumeClaimTemplates }} volumeClaimTemplates: {{- $nodeset.volumeClaimTemplates | toYaml | nindent 4 }} diff --git a/helm/slurm/templates/controller/controller-statefulset.yaml b/helm/slurm/templates/controller/controller-statefulset.yaml index b2125fcb..6120c615 100644 --- a/helm/slurm/templates/controller/controller-statefulset.yaml +++ b/helm/slurm/templates/controller/controller-statefulset.yaml @@ -39,6 +39,10 @@ spec: affinity: {{- .Values.controller.affinity | toYaml | nindent 8 }} {{- end }}{{- /* if .Values.controller.affinity */}} + {{- if .Values.controller.tolerations }} + tolerations: + {{- .Values.controller.tolerations | toYaml | nindent 8 }} + {{- end }}{{- /* if .Values.controller.tolerations */}} {{- include "slurm.imagePullSecrets" . | nindent 6 }} {{- if .Values.accounting.enabled }} securityContext: diff --git a/helm/slurm/templates/login/login-deployment.yaml b/helm/slurm/templates/login/login-deployment.yaml new file mode 100644 index 00000000..4ae4b9c0 --- /dev/null +++ b/helm/slurm/templates/login/login-deployment.yaml @@ -0,0 +1,105 @@ +{{- /* +SPDX-FileCopyrightText: Copyright (C) SchedMD LLC. +SPDX-License-Identifier: Apache-2.0 +*/}} + +{{- if .Values.login.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "slurm.login.name" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "slurm.login.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.login.replicas | default 1 }} + selector: + matchLabels: + {{- include "slurm.login.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: login + labels: + {{- include "slurm.login.labels" . | nindent 8 }} + spec: + priorityClassName: {{ .Values.login.priorityClassName | default .Values.priorityClassName }} + automountServiceAccountToken: false + enableServiceLinks: false + dnsConfig: + {{- include "slurm.dnsConfig" . | nindent 8 }} + initContainers: + - name: init + image: {{ include "slurm.authcred.imageRef" . }} + imagePullPolicy: {{ .Values.authcred.imagePullPolicy | default (include "slurm.imagePullPolicy" .) }} + {{- with .Values.authcred.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }}{{- /* with .Values.authcred.resources */}} + env: + - name: SLURM_USER + value: {{ include "slurm.user" . }} + command: + - bash + - -c + - | + {{- range .Files.Lines "scripts/init.sh" }} + {{ . }} + {{- end }}{{- /* range .Files.Lines "scripts/init.sh" */}} + volumeMounts: + {{- include "slurm.init.volumeMounts" . | nindent 12 }} + containers: + - name: login + image: {{ include "slurm.login.imageRef" . }} + imagePullPolicy: {{ .Values.login.imagePullPolicy | default (include "slurm.imagePullPolicy" .) }} + {{- with .Values.login.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + env: + - name: SACKD_OPTIONS + value: -D --conf-server={{- printf "%s:%s" (include "slurm.controller.name" .) (include "slurm.controller.port" .) }} + ports: + - name: ssh + containerPort: 22 + {{- with .Values.login.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }}{{- /* with .Values.login.resources */}} + readinessProbe: + exec: + command: + - test + - -S + - /run/slurm/sack.socket + volumeMounts: + {{- include "slurm.volumeMounts" . | nindent 12 }} + - name: authsocket + mountPath: /run/slurm + {{- with .Values.login.extraVolumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }}{{- /* range $vol := $.Values.login.extraVolumeMounts */}} + {{- with .Values.accounting.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }}{{- /* with .Values.accounting.nodeSelector */}} + {{- with .Values.login.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }}{{- /* with .Values.login.affinity */}} + {{- with .Values.login.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }}{{- /* with .Values.login.tolerations */}} + volumes: + {{- include "slurm.volumes" . | nindent 8 }} + - name: slurm-config + projected: + defaultMode: 0600 + sources: + - secret: + name: {{ include "slurm.auth.secretName" . }} + {{- with .Values.login.extraVolumes }} + {{- toYaml . | nindent 8 }} + {{- end }}{{- /* with .Values.login.volumes */}} +{{- end }}{{- /* if .Values.login.enabled */}} diff --git a/helm/slurm/templates/login/login-service.yaml b/helm/slurm/templates/login/login-service.yaml new file mode 100644 index 00000000..5e44e18b --- /dev/null +++ b/helm/slurm/templates/login/login-service.yaml @@ -0,0 +1,18 @@ +{{- if .Values.login.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: slurm-login + namespace: {{ .Release.Namespace }} + labels: + {{- include "slurm.login.labels" . | nindent 4 }} +spec: + type: ClusterIP + selector: + {{- include "slurm.login.selectorLabels" . | nindent 4 }} + ports: + - name: ssh + port: 22 + targetPort: ssh + protocol: TCP +{{- end }}{{- /* if .Values.login.enabled */}} diff --git a/helm/slurm/templates/restapi/restapi-deployment.yaml b/helm/slurm/templates/restapi/restapi-deployment.yaml index 7e006b9a..19bb5b90 100644 --- a/helm/slurm/templates/restapi/restapi-deployment.yaml +++ b/helm/slurm/templates/restapi/restapi-deployment.yaml @@ -38,6 +38,10 @@ spec: affinity: {{- .Values.restapi.affinity | toYaml | nindent 8 }} {{- end }}{{- /* if .Values.restapi.affinity */}} + {{- if .Values.restapi.tolerations }} + tolerations: + {{- .Values.restapi.tolerations | toYaml | nindent 8 }} + {{- end }}{{- /* if .Values.restapi.tolerations */}} {{- include "slurm.imagePullSecrets" . | nindent 6 }} initContainers: - name: init diff --git a/helm/slurm/values.yaml b/helm/slurm/values.yaml index d8baf60c..e90d5496 100644 --- a/helm/slurm/values.yaml +++ b/helm/slurm/values.yaml @@ -196,6 +196,11 @@ controller: # Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity affinity: {} # + # -- (list) + # Configure pod tolerations. + # Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ + tolerations: [] + # # -- (object) # Set container resource requests and limits for Kubernetes Pod scheduling. # Ref: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-requests-and-limits-of-pod-and-container @@ -243,6 +248,95 @@ controller: # matchLabels: # app: foo +# +# Login node configurations. +login: + # + # -- (bool) + # Enables login nodes. + enabled: true + # + # -- (integer) + # Set the number of replicas to deploy. + replicas: 1 + # + # -- (string) + # Set the image pull policy. + imagePullPolicy: IfNotPresent + # + # Set the image to use. + image: + # + # -- (string) + # Set the image repository to use. + repository: ghcr.io/slinkyproject/login + # + # -- (string) + # Set the image tag to use. + tag: 25.05-ubuntu24.04 + # + # -- (object) + # The security context given to the container. + # Ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container + securityContext: + privileged: false + # capabilities: + # add: + # - SYS_CHROOT + # + # --(list) + # List of volume mounts. + # Ref: https://kubernetes.io/docs/concepts/storage/volumes/ + extraVolumeMounts: [] + # - name: nfs-home + # mountPath: /home + # - name: nfs-data + # mountPath: /mnt/data + # + # --(list) + # Define list of pod volumes. + # Ref: https://kubernetes.io/docs/concepts/storage/volumes/ + extraVolumes: [] + # - name: nfs-home + # nfs: + # server: nfs-server.example.com + # path: /exports/home/ + # - name: nfs-data + # persistentVolumeClaim: + # claimName: nfs-data + # + # -- (string) + # Set the priority class to use. + # Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/pod-priority-preemption/#priorityclass + priorityClassName: "" + # + # -- (map) + # Selector which must match a node's labels for the pod to be scheduled on that node. + # Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector + nodeSelector: + kubernetes.io/os: linux + # + # -- (object) + # Set affinity for Kubernetes Pod scheduling. + # Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity + affinity: {} + # + # -- (list) + # Configure pod tolerations. + # Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ + tolerations: [] + # + # -- (object) + # Set container resource requests and limits for Kubernetes Pod scheduling. + # Ref: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-requests-and-limits-of-pod-and-container + resources: {} + # requests: + # cpu: 1 + # memory: 1Gi + # limits: + # cpu: 2 + # memory: 4Gi + # # Slurm compute (slurmd) configurations. compute: @@ -314,6 +408,10 @@ compute: cpu: 1 memory: 1Gi # + # -- (string) + # Set the size of the shared memory for the nodeset. + shmSize: 16Gi + # # -- (map) # Selector which must match a node's labels for the pod to be scheduled on that node. nodeSelector: @@ -383,6 +481,17 @@ compute: # `Delete` policy causes those PVCs to be deleted. whenDeleted: Retain # + # Define a persistent volume for the nodeset to store its save-state. + persistence: + # + # -- (string) + # Name of an existing `PersistentVolumeClaim` to use instead of creating one from definition. + existingDataClaims: [] + # - name: data-cpu-pv + # mountPath: /data + # - name: scratch-gpu-pv + # mountPath: /scratch + # # --(list) # List of claims that pods are allowed to reference. # The NodeSet controller is responsible for mapping network identities to @@ -487,6 +596,11 @@ accounting: # Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity affinity: {} # + # -- (list) + # Configure pod tolerations. + # Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ + tolerations: [] + # # -- (object) # Set container resource requests and limits for Kubernetes Pod scheduling. # Ref: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-requests-and-limits-of-pod-and-container @@ -585,6 +699,11 @@ restapi: # Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity affinity: {} # + # -- (list) + # Configure pod tolerations. + # Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ + tolerations: [] + # # -- (object) # Set container resource requests and limits for Kubernetes Pod scheduling. # Ref: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-requests-and-limits-of-pod-and-container diff --git a/internal/annotations/node.go b/internal/annotations/node.go index fbc4c726..c1c17ad1 100644 --- a/internal/annotations/node.go +++ b/internal/annotations/node.go @@ -9,4 +9,5 @@ const ( // Note that this is honored on a best-effort basis, and so it does not // offer guarantees on Node scheduling order. NodeWeight = "slinky.slurm.net/node-weight" + NodeCordon = "slinky.slurm.net/node-cordon" ) diff --git a/internal/controller/cluster/cluster_control.go b/internal/controller/cluster/cluster_control.go index fcf6c7ea..661ac135 100644 --- a/internal/controller/cluster/cluster_control.go +++ b/internal/controller/cluster/cluster_control.go @@ -22,10 +22,10 @@ import ( "github.com/SlinkyProject/slurm-client/pkg/object" slurmtypes "github.com/SlinkyProject/slurm-client/pkg/types" - slinkyv1alpha1 "github.com/SlinkyProject/slurm-operator/api/v1alpha1" - nodesetcontroller "github.com/SlinkyProject/slurm-operator/internal/controller/nodeset" - "github.com/SlinkyProject/slurm-operator/internal/resources" - "github.com/SlinkyProject/slurm-operator/internal/utils" + slinkyv1alpha1 "github.com/togethercomputer/slurm-operator/api/v1alpha1" + nodesetcontroller "github.com/togethercomputer/slurm-operator/internal/controller/nodeset" + "github.com/togethercomputer/slurm-operator/internal/resources" + "github.com/togethercomputer/slurm-operator/internal/utils" ) // ClusterControl implements the control logic for synchronizing Clusters and their children Pods. It is implemented diff --git a/internal/controller/cluster/cluster_controller.go b/internal/controller/cluster/cluster_controller.go index 016db9fd..193b93e0 100644 --- a/internal/controller/cluster/cluster_controller.go +++ b/internal/controller/cluster/cluster_controller.go @@ -25,9 +25,9 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/reconcile" - slinkyv1alpha1 "github.com/SlinkyProject/slurm-operator/api/v1alpha1" - "github.com/SlinkyProject/slurm-operator/internal/resources" - "github.com/SlinkyProject/slurm-operator/internal/utils/durationstore" + slinkyv1alpha1 "github.com/togethercomputer/slurm-operator/api/v1alpha1" + "github.com/togethercomputer/slurm-operator/internal/resources" + "github.com/togethercomputer/slurm-operator/internal/utils/durationstore" ) const ( diff --git a/internal/controller/cluster/cluster_controller_test.go b/internal/controller/cluster/cluster_controller_test.go index 322eb866..0bb2b7a3 100644 --- a/internal/controller/cluster/cluster_controller_test.go +++ b/internal/controller/cluster/cluster_controller_test.go @@ -16,7 +16,7 @@ import ( "github.com/SlinkyProject/slurm-client/pkg/object" slurmtypes "github.com/SlinkyProject/slurm-client/pkg/types" - slinkyv1alpha1 "github.com/SlinkyProject/slurm-operator/api/v1alpha1" + slinkyv1alpha1 "github.com/togethercomputer/slurm-operator/api/v1alpha1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" diff --git a/internal/controller/cluster/cluster_status_updater.go b/internal/controller/cluster/cluster_status_updater.go index 60fe12b5..5a50874b 100644 --- a/internal/controller/cluster/cluster_status_updater.go +++ b/internal/controller/cluster/cluster_status_updater.go @@ -14,7 +14,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" - slinkyv1alpha1 "github.com/SlinkyProject/slurm-operator/api/v1alpha1" + slinkyv1alpha1 "github.com/togethercomputer/slurm-operator/api/v1alpha1" ) // ClusterStatusUpdaterInterface is an interface used to update the ClusterStatus associated with a StatefulSet. diff --git a/internal/controller/cluster/suite_test.go b/internal/controller/cluster/suite_test.go index f9f4c238..024e5b52 100644 --- a/internal/controller/cluster/suite_test.go +++ b/internal/controller/cluster/suite_test.go @@ -24,8 +24,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log/zap" "sigs.k8s.io/controller-runtime/pkg/metrics/server" - slinkyv1alpha1 "github.com/SlinkyProject/slurm-operator/api/v1alpha1" - "github.com/SlinkyProject/slurm-operator/internal/resources" + slinkyv1alpha1 "github.com/togethercomputer/slurm-operator/api/v1alpha1" + "github.com/togethercomputer/slurm-operator/internal/resources" //+kubebuilder:scaffold:imports ) diff --git a/internal/controller/nodeset/nodeset_control.go b/internal/controller/nodeset/nodeset_control.go index a22af7d4..f1634f46 100644 --- a/internal/controller/nodeset/nodeset_control.go +++ b/internal/controller/nodeset/nodeset_control.go @@ -31,10 +31,11 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/reconcile" - slinkyv1alpha1 "github.com/SlinkyProject/slurm-operator/api/v1alpha1" - "github.com/SlinkyProject/slurm-operator/internal/errors" - "github.com/SlinkyProject/slurm-operator/internal/resources" - "github.com/SlinkyProject/slurm-operator/internal/utils" + slinkyv1alpha1 "github.com/togethercomputer/slurm-operator/api/v1alpha1" + "github.com/togethercomputer/slurm-operator/internal/annotations" + "github.com/togethercomputer/slurm-operator/internal/errors" + "github.com/togethercomputer/slurm-operator/internal/resources" + "github.com/togethercomputer/slurm-operator/internal/utils" ) // NodeSetControl implements the control logic for synchronizing NodeSets and their children Pods. It is implemented @@ -426,8 +427,16 @@ func (nsc *defaultNodeSetControl) processNodeSetPod( } } + // We need this to check if the node is annotated + node := &corev1.Node{} + if err := nsc.Get(ctx, client.ObjectKey{Namespace: set.Namespace, Name: pods[i].Spec.Hostname}, node); err != nil { + return err + } + stateMatch := true - if isNodeSetPodCordon(pods[i]) || nsc.podControl.isNodeSetPodDrain(ctx, set, pods[i]) { + drained := nsc.podControl.isNodeSetPodDrain(ctx, set, pods[i]) + annotation := node.Annotations != nil && node.Annotations[annotations.NodeCordon] == "true" + if isNodeSetPodCordon(pods[i]) || drained || !drained && annotation { stateMatch = false } diff --git a/internal/controller/nodeset/nodeset_controller.go b/internal/controller/nodeset/nodeset_controller.go index 9caf4118..831edede 100644 --- a/internal/controller/nodeset/nodeset_controller.go +++ b/internal/controller/nodeset/nodeset_controller.go @@ -23,9 +23,9 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/controller-runtime/pkg/source" - slinkyv1alpha1 "github.com/SlinkyProject/slurm-operator/api/v1alpha1" - "github.com/SlinkyProject/slurm-operator/internal/resources" - "github.com/SlinkyProject/slurm-operator/internal/utils/durationstore" + slinkyv1alpha1 "github.com/togethercomputer/slurm-operator/api/v1alpha1" + "github.com/togethercomputer/slurm-operator/internal/resources" + "github.com/togethercomputer/slurm-operator/internal/utils/durationstore" ) const ( diff --git a/internal/controller/nodeset/nodeset_controller_test.go b/internal/controller/nodeset/nodeset_controller_test.go index 1b4a781d..91cf7989 100644 --- a/internal/controller/nodeset/nodeset_controller_test.go +++ b/internal/controller/nodeset/nodeset_controller_test.go @@ -23,7 +23,7 @@ import ( slurmtypes "github.com/SlinkyProject/slurm-client/pkg/types" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" - slinkyv1alpha1 "github.com/SlinkyProject/slurm-operator/api/v1alpha1" + slinkyv1alpha1 "github.com/togethercomputer/slurm-operator/api/v1alpha1" ) func newFakeClientList(interceptorFuncs interceptor.Funcs, initObjLists ...object.ObjectList) slurmclient.Client { diff --git a/internal/controller/nodeset/nodeset_event_handler.go b/internal/controller/nodeset/nodeset_event_handler.go index 9a9a2e81..ffee57d7 100644 --- a/internal/controller/nodeset/nodeset_event_handler.go +++ b/internal/controller/nodeset/nodeset_event_handler.go @@ -24,8 +24,8 @@ import ( slurmclient "github.com/SlinkyProject/slurm-client/pkg/client" slurmtypes "github.com/SlinkyProject/slurm-client/pkg/types" - slinkyv1alpha1 "github.com/SlinkyProject/slurm-operator/api/v1alpha1" - "github.com/SlinkyProject/slurm-operator/internal/utils" + slinkyv1alpha1 "github.com/togethercomputer/slurm-operator/api/v1alpha1" + "github.com/togethercomputer/slurm-operator/internal/utils" ) var _ handler.EventHandler = &podEventHandler{} diff --git a/internal/controller/nodeset/nodeset_history.go b/internal/controller/nodeset/nodeset_history.go index f4a953a3..919a9899 100644 --- a/internal/controller/nodeset/nodeset_history.go +++ b/internal/controller/nodeset/nodeset_history.go @@ -21,7 +21,7 @@ import ( "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" - slinkyv1alpha1 "github.com/SlinkyProject/slurm-operator/api/v1alpha1" + slinkyv1alpha1 "github.com/togethercomputer/slurm-operator/api/v1alpha1" ) type realHistory struct { diff --git a/internal/controller/nodeset/nodeset_pod_control.go b/internal/controller/nodeset/nodeset_pod_control.go index e039f9ec..742813d6 100644 --- a/internal/controller/nodeset/nodeset_pod_control.go +++ b/internal/controller/nodeset/nodeset_pod_control.go @@ -26,10 +26,10 @@ import ( "github.com/SlinkyProject/slurm-client/pkg/object" slurmtypes "github.com/SlinkyProject/slurm-client/pkg/types" - slinkyv1alpha1 "github.com/SlinkyProject/slurm-operator/api/v1alpha1" - "github.com/SlinkyProject/slurm-operator/internal/annotations" - "github.com/SlinkyProject/slurm-operator/internal/errors" - "github.com/SlinkyProject/slurm-operator/internal/resources" + slinkyv1alpha1 "github.com/togethercomputer/slurm-operator/api/v1alpha1" + "github.com/togethercomputer/slurm-operator/internal/annotations" + "github.com/togethercomputer/slurm-operator/internal/errors" + "github.com/togethercomputer/slurm-operator/internal/resources" ) // NodeSetPodControlObjectManager abstracts the manipulation of Pods and PVCs. The real controller implements this @@ -193,35 +193,60 @@ func (spc *NodeSetPodControl) updateSlurmNode( delete(pod.Annotations, annotations.PodCordon) err := spc.Update(ctx, pod) spc.recordPodEvent("uncordon", set, pod, err) - return err - } - if spc.isNodeSetPodDrain(ctx, set, pod) { - clusterName := types.NamespacedName{ - Namespace: set.GetNamespace(), - Name: set.Spec.ClusterName, + if err != nil { + return err } - slurmClient := spc.slurmClusters.Get(clusterName) - if slurmClient != nil && !isNodeSetPodDelete(pod) { - objectKey := object.ObjectKey(pod.Spec.Hostname) - slurmNode := &slurmtypes.Node{} - if err := slurmClient.Get(ctx, objectKey, slurmNode); err != nil { - if err.Error() == http.StatusText(http.StatusNotFound) { - return nil - } - return err + + if spc.isNodeSetPodDrain(ctx, set, pod) { + clusterName := types.NamespacedName{ + Namespace: set.GetNamespace(), + Name: set.Spec.ClusterName, } + slurmClient := spc.slurmClusters.Get(clusterName) + if slurmClient != nil && !isNodeSetPodDelete(pod) { + objectKey := object.ObjectKey(pod.Spec.Hostname) + slurmNode := &slurmtypes.Node{} + if err := slurmClient.Get(ctx, objectKey, slurmNode); err != nil { + if err.Error() == http.StatusText(http.StatusNotFound) { + return nil + } + return err + } - logger.Info("Undrain Slurm Node", "slurmNode", slurmNode, "Pod", pod) - slurmNode.State.Insert(slurmtypes.NodeStateUNDRAIN) - if err := slurmClient.Update(ctx, slurmNode); err != nil { - if err.Error() == http.StatusText(http.StatusNotFound) { - return nil + logger.Info("Undrain Slurm Node", "slurmNode", slurmNode, "Pod", pod) + slurmNode.State.Insert(slurmtypes.NodeStateUNDRAIN) + if err := slurmClient.Update(ctx, slurmNode); err != nil { + if err.Error() == http.StatusText(http.StatusNotFound) { + return nil + } + return err } - return err } + } + } else { + node := &corev1.Node{} + if err := spc.Get(ctx, client.ObjectKey{Namespace: set.Namespace, Name: pod.Spec.Hostname}, node); err != nil { + return err + } - return nil + if node.Annotations == nil { + node.Annotations = make(map[string]string) + } + + if spc.isNodeSetPodDrain(ctx, set, pod) { + // Annotate the cluster to indicate nodes that are cordoned + logger.Info("Node is drained, cordoning node") + node.Annotations[annotations.NodeCordon] = "true" + } else { + // Annotate the cluster to indicate nodes that are not drained + logger.Info("Node is not drained, un-cordoning node") + node.Annotations[annotations.NodeCordon] = "false" + } + + // Update node + if err := spc.Update(ctx, node); err != nil { + return err } } diff --git a/internal/controller/nodeset/nodeset_status_updater.go b/internal/controller/nodeset/nodeset_status_updater.go index bdad5099..0bcb2bfb 100644 --- a/internal/controller/nodeset/nodeset_status_updater.go +++ b/internal/controller/nodeset/nodeset_status_updater.go @@ -13,8 +13,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" - slinkyv1alpha1 "github.com/SlinkyProject/slurm-operator/api/v1alpha1" - "github.com/SlinkyProject/slurm-operator/internal/utils" + slinkyv1alpha1 "github.com/togethercomputer/slurm-operator/api/v1alpha1" + "github.com/togethercomputer/slurm-operator/internal/utils" ) // NodeSetStatusUpdaterInterface is an interface used to update the NodeSetStatus associated with a StatefulSet. diff --git a/internal/controller/nodeset/nodeset_update.go b/internal/controller/nodeset/nodeset_update.go index 413aff3b..ae8e4825 100644 --- a/internal/controller/nodeset/nodeset_update.go +++ b/internal/controller/nodeset/nodeset_update.go @@ -27,9 +27,9 @@ import ( "github.com/SlinkyProject/slurm-client/pkg/object" slurmtypes "github.com/SlinkyProject/slurm-client/pkg/types" - slinkyv1alpha1 "github.com/SlinkyProject/slurm-operator/api/v1alpha1" - "github.com/SlinkyProject/slurm-operator/internal/annotations" - "github.com/SlinkyProject/slurm-operator/internal/utils" + slinkyv1alpha1 "github.com/togethercomputer/slurm-operator/api/v1alpha1" + "github.com/togethercomputer/slurm-operator/internal/annotations" + "github.com/togethercomputer/slurm-operator/internal/utils" ) // updatedDesiredNodeCounts calculates the true number of allowed unavailable or surge pods and diff --git a/internal/controller/nodeset/nodeset_utils.go b/internal/controller/nodeset/nodeset_utils.go index a6d84a8f..5c2b5047 100644 --- a/internal/controller/nodeset/nodeset_utils.go +++ b/internal/controller/nodeset/nodeset_utils.go @@ -28,9 +28,9 @@ import ( "k8s.io/utils/integer" "k8s.io/utils/ptr" - slinkyv1alpha1 "github.com/SlinkyProject/slurm-operator/api/v1alpha1" - "github.com/SlinkyProject/slurm-operator/internal/annotations" - "github.com/SlinkyProject/slurm-operator/internal/utils" + slinkyv1alpha1 "github.com/togethercomputer/slurm-operator/api/v1alpha1" + "github.com/togethercomputer/slurm-operator/internal/annotations" + "github.com/togethercomputer/slurm-operator/internal/utils" ) // var patchCodec = scheme.Codecs.LegacyCodec(slinkyv1alpha1.SchemeGroupVersion) @@ -361,6 +361,16 @@ func newNodeSetPod(set *slinkyv1alpha1.NodeSet, nodeName, hash string) *corev1.P // Added default tolerations for NodeSet pods, pinning Pod to Node by nodeName. util.AddOrUpdateDaemonPodTolerations(&pod.Spec) + + // Remove unschedulable toleration + tolerations := pod.Spec.Tolerations + var filteredTolerations []corev1.Toleration + for _, toleration := range tolerations { + if toleration.Key != corev1.TaintNodeUnschedulable { + filteredTolerations = append(filteredTolerations, toleration) + } + } + pod.Spec.Tolerations = filteredTolerations // The pod's NodeAffinity will be updated to make sure the Pod is bound // to the target node by default scheduler. It is safe to do so because there diff --git a/internal/controller/nodeset/nodeset_utils_test.go b/internal/controller/nodeset/nodeset_utils_test.go index adf29d7d..92fbc51a 100644 --- a/internal/controller/nodeset/nodeset_utils_test.go +++ b/internal/controller/nodeset/nodeset_utils_test.go @@ -8,8 +8,8 @@ import ( "reflect" "testing" - slinkyv1alpha1 "github.com/SlinkyProject/slurm-operator/api/v1alpha1" - "github.com/SlinkyProject/slurm-operator/internal/annotations" + slinkyv1alpha1 "github.com/togethercomputer/slurm-operator/api/v1alpha1" + "github.com/togethercomputer/slurm-operator/internal/annotations" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" diff --git a/internal/controller/nodeset/suite_test.go b/internal/controller/nodeset/suite_test.go index cdc48c10..51459369 100644 --- a/internal/controller/nodeset/suite_test.go +++ b/internal/controller/nodeset/suite_test.go @@ -24,8 +24,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log/zap" "sigs.k8s.io/controller-runtime/pkg/metrics/server" - slinkyv1alpha1 "github.com/SlinkyProject/slurm-operator/api/v1alpha1" - "github.com/SlinkyProject/slurm-operator/internal/resources" + slinkyv1alpha1 "github.com/togethercomputer/slurm-operator/api/v1alpha1" + "github.com/togethercomputer/slurm-operator/internal/resources" //+kubebuilder:scaffold:imports ) diff --git a/internal/utils/meta_test.go b/internal/utils/meta_test.go index 074b2ed1..ff3c34a5 100644 --- a/internal/utils/meta_test.go +++ b/internal/utils/meta_test.go @@ -8,7 +8,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - slinkyv1alpha1 "github.com/SlinkyProject/slurm-operator/api/v1alpha1" + slinkyv1alpha1 "github.com/togethercomputer/slurm-operator/api/v1alpha1" ) func TestKeyFunc(t *testing.T) { diff --git a/internal/utils/node.go b/internal/utils/node.go index 4eb17dc4..c3b08029 100644 --- a/internal/utils/node.go +++ b/internal/utils/node.go @@ -6,7 +6,7 @@ package utils import ( corev1 "k8s.io/api/core/v1" - "github.com/SlinkyProject/slurm-operator/internal/annotations" + "github.com/togethercomputer/slurm-operator/internal/annotations" ) type NodeByWeight []*corev1.Node diff --git a/internal/utils/node_test.go b/internal/utils/node_test.go index f4a39b3c..0da015a7 100644 --- a/internal/utils/node_test.go +++ b/internal/utils/node_test.go @@ -6,7 +6,7 @@ package utils import ( "testing" - "github.com/SlinkyProject/slurm-operator/internal/annotations" + "github.com/togethercomputer/slurm-operator/internal/annotations" corev1 "k8s.io/api/core/v1" ) diff --git a/internal/utils/pod.go b/internal/utils/pod.go index 47cf696f..40f83557 100644 --- a/internal/utils/pod.go +++ b/internal/utils/pod.go @@ -8,7 +8,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" podutil "k8s.io/kubernetes/pkg/api/v1/pod" - "github.com/SlinkyProject/slurm-operator/internal/annotations" + "github.com/togethercomputer/slurm-operator/internal/annotations" ) type PodByCreationTimestampAndPhase []*corev1.Pod diff --git a/internal/utils/pod_test.go b/internal/utils/pod_test.go index 4300d464..bb1da88a 100644 --- a/internal/utils/pod_test.go +++ b/internal/utils/pod_test.go @@ -7,7 +7,7 @@ import ( "testing" "time" - "github.com/SlinkyProject/slurm-operator/internal/annotations" + "github.com/togethercomputer/slurm-operator/internal/annotations" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) diff --git a/slurm-operator b/slurm-operator deleted file mode 160000 index 16755942..00000000 --- a/slurm-operator +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 167559423e50f9123cda1dd8a2be1edd9893b3f1