@@ -178,76 +178,86 @@ func (r *AzureK8sAutopilot) initMetricsUpdate() {
178178 prometheus .MustRegister (r .prometheus .update .duration )
179179}
180180
181- func (r * AzureK8sAutopilot ) Run () {
182- r .leaderElect ()
183- log .Infof ("starting cluster check loop" )
181+ func (r * AzureK8sAutopilot ) Start () {
182+ go func () {
183+ r .leaderElect ()
184+ log .Infof ("starting autopilot" )
185+
186+ if r .Config .Repair .Crontab != "" {
187+ r .startAutopilotRepair ()
188+ }
189+
190+ if r .Config .Update .Crontab != "" {
191+ r .startAutopilotUpdate ()
192+ }
193+ }()
194+ }
195+
196+ func (r * AzureK8sAutopilot ) startAutopilotRepair () {
184197 // repair job
185- if r .Config .Repair .Crontab != "" {
186- r .cron .repair = cron .New (
187- cron .WithChain (
188- cron .SkipIfStillRunning (
189- cron .PrintfLogger (
190- log .StandardLogger (),
191- ),
198+ r .cron .repair = cron .New (
199+ cron .WithChain (
200+ cron .SkipIfStillRunning (
201+ cron .PrintfLogger (
202+ log .StandardLogger (),
192203 ),
193204 ),
194- )
195-
196- _ , err := r .cron .repair .AddFunc (r .Config .Repair .Crontab , func () {
197- contextLogger := log .WithField ("job" , "repair" )
198-
199- // concurrency repair limit
200- if r .Config .Repair .Limit > 0 && r .nodeRepairLock .ItemCount () >= r .Config .Repair .Limit {
201- contextLogger .Infof ("concurrent repair limit reached, skipping run" )
202- } else {
203- start := time .Now ()
204- contextLogger .Infoln ("starting repair check" )
205- r .repairRun (contextLogger )
206- runtime := time .Now ().Sub (start )
207- r .prometheus .repair .duration .WithLabelValues ().Set (runtime .Seconds ())
208- contextLogger .WithField ("duration" , runtime .String ()).Infof ("finished after %s" , runtime .String ())
209- }
210- })
211- if err != nil {
212- log .Panic (err )
213- }
205+ ),
206+ )
214207
215- r .cron .repair .Start ()
208+ _ , err := r .cron .repair .AddFunc (r .Config .Repair .Crontab , func () {
209+ contextLogger := log .WithField ("job" , "repair" )
210+
211+ // concurrency repair limit
212+ if r .Config .Repair .Limit > 0 && r .nodeRepairLock .ItemCount () >= r .Config .Repair .Limit {
213+ contextLogger .Infof ("concurrent repair limit reached, skipping run" )
214+ } else {
215+ start := time .Now ()
216+ contextLogger .Infoln ("starting repair check" )
217+ r .repairRun (contextLogger )
218+ runtime := time .Now ().Sub (start )
219+ r .prometheus .repair .duration .WithLabelValues ().Set (runtime .Seconds ())
220+ contextLogger .WithField ("duration" , runtime .String ()).Infof ("finished after %s" , runtime .String ())
221+ }
222+ })
223+ if err != nil {
224+ log .Panic (err )
216225 }
217226
218- // upgrade job
219- if r .Config .Update .Crontab != "" {
220- r .cron .update = cron .New (
221- cron .WithChain (
222- cron .SkipIfStillRunning (
223- cron .PrintfLogger (
224- log .StandardLogger (),
225- ),
227+ r .cron .repair .Start ()
228+ }
229+
230+ func (r * AzureK8sAutopilot ) startAutopilotUpdate () {
231+ r .cron .update = cron .New (
232+ cron .WithChain (
233+ cron .SkipIfStillRunning (
234+ cron .PrintfLogger (
235+ log .StandardLogger (),
226236 ),
227237 ),
228- )
229-
230- _ , err := r .cron .update .AddFunc (r .Config .Update .Crontab , func () {
231- contextLogger := log .WithField ("job" , "update" )
232-
233- // concurrency repair limit
234- if r .Config .Update .Limit > 0 && r .nodeUpdateLock .ItemCount () >= r .Config .Update .Limit {
235- contextLogger .Infof ("concurrent update limit reached, skipping run" )
236- } else {
237- contextLogger .Infoln ("starting update check" )
238- start := time .Now ()
239- r .updateRun (contextLogger )
240- runtime := time .Now ().Sub (start )
241- r .prometheus .update .duration .WithLabelValues ().Set (runtime .Seconds ())
242- contextLogger .WithField ("duration" , runtime .String ()).Infof ("finished after %s" , runtime .String ())
243- }
244- })
245- if err != nil {
246- log .Panic (err )
247- }
238+ ),
239+ )
248240
249- r .cron .update .Start ()
241+ _ , err := r .cron .update .AddFunc (r .Config .Update .Crontab , func () {
242+ contextLogger := log .WithField ("job" , "update" )
243+
244+ // concurrency repair limit
245+ if r .Config .Update .Limit > 0 && r .nodeUpdateLock .ItemCount () >= r .Config .Update .Limit {
246+ contextLogger .Infof ("concurrent update limit reached, skipping run" )
247+ } else {
248+ contextLogger .Infoln ("starting update check" )
249+ start := time .Now ()
250+ r .updateRun (contextLogger )
251+ runtime := time .Now ().Sub (start )
252+ r .prometheus .update .duration .WithLabelValues ().Set (runtime .Seconds ())
253+ contextLogger .WithField ("duration" , runtime .String ()).Infof ("finished after %s" , runtime .String ())
254+ }
255+ })
256+ if err != nil {
257+ log .Panic (err )
250258 }
259+
260+ r .cron .update .Start ()
251261}
252262
253263func (r * AzureK8sAutopilot ) leaderElect () {
0 commit comments