@@ -17,32 +17,38 @@ import (
1717
1818 api "flux-framework/flux-operator/api/v1alpha1"
1919
20+ ctrl "sigs.k8s.io/controller-runtime"
2021 jobset "sigs.k8s.io/jobset/api/v1alpha1"
2122)
2223
2324func (r * MiniClusterReconciler ) newJobSet (
2425 cluster * api.MiniCluster ,
2526) (* jobset.JobSet , error ) {
2627
27- suspend := true
28+ // I don't really understand how this works, but it seems to be
29+ // not creating any pods? So bad idea?
30+ suspend := false
2831 jobs := jobset.JobSet {
2932 ObjectMeta : metav1.ObjectMeta {
30- Name : cluster . Name ,
33+ Name : "minicluster" ,
3134 Namespace : cluster .Namespace ,
3235 Labels : cluster .Spec .JobLabels ,
3336 },
3437 Spec : jobset.JobSetSpec {
3538
3639 // Suspend child jobs (the worker pods) when broker finishes
40+ // How do I define a child job?
3741 Suspend : & suspend ,
3842 // TODO decide on FailurePolicy here
3943 // default is to fail if all jobs in jobset fail
4044 },
4145 }
4246
4347 // Get leader broker job, the parent in the JobSet (worker or follower pods)
48+ // Both are required to be in indexed completion mode to have a service!
49+ // I'm not sure that totally makes sense, but ok!
4450 // cluster, size, entrypoint, indexed
45- leaderJob , err := r .getJob (cluster , 1 , "broker" , false )
51+ leaderJob , err := r .getJob (cluster , 1 , "broker" , true )
4652 if err != nil {
4753 return & jobs , err
4854 }
@@ -51,10 +57,11 @@ func (r *MiniClusterReconciler) newJobSet(
5157 return & jobs , err
5258 }
5359 jobs .Spec .ReplicatedJobs = []jobset.ReplicatedJob {leaderJob , workerJob }
60+ ctrl .SetControllerReference (cluster , & jobs , r .Scheme )
5461 return & jobs , nil
5562}
5663
57- // getBrokerJob creates the job for the main leader broker
64+ // getJob creates a job for a main leader ( broker) or worker (followers)
5865func (r * MiniClusterReconciler ) getJob (
5966 cluster * api.MiniCluster ,
6067 size int32 ,
@@ -64,14 +71,13 @@ func (r *MiniClusterReconciler) getJob(
6471
6572 backoffLimit := int32 (100 )
6673 podLabels := r .getPodLabels (cluster )
67- enableDNSHostnames := true
74+ enableDNSHostnames := false
6875 completionMode := batchv1 .NonIndexedCompletion
6976
7077 if indexed {
7178 completionMode = batchv1 .IndexedCompletion
7279 }
7380
74- // TODO how are these named
7581 job := jobset.ReplicatedJob {
7682 Name : cluster .Name + "-" + entrypoint ,
7783
@@ -110,7 +116,7 @@ func (r *MiniClusterReconciler) getJob(
110116 },
111117 Spec : corev1.PodSpec {
112118 // matches the service
113- // Subdomain: restfulServiceName,
119+ Subdomain : restfulServiceName ,
114120 Volumes : getVolumes (cluster , entrypoint ),
115121 RestartPolicy : corev1 .RestartPolicyOnFailure ,
116122 ImagePullSecrets : getImagePullSecrets (cluster ),
@@ -130,7 +136,12 @@ func (r *MiniClusterReconciler) getJob(
130136
131137 // Get volume mounts, add on container specific ones
132138 mounts := getVolumeMounts (cluster )
133- containers , err := r .getContainers (cluster .Spec .Containers , cluster .Name , mounts )
139+ containers , err := r .getContainers (
140+ cluster .Spec .Containers ,
141+ cluster .Name ,
142+ mounts ,
143+ entrypoint ,
144+ )
134145 jobspec .Template .Spec .Containers = containers
135146 job .Template .Spec = jobspec
136147 return job , err
0 commit comments