Skip to content

Commit 2c2878b

Browse files
committed
fix not starting http server when no leader
Signed-off-by: Markus Blaschke <[email protected]>
1 parent 809ec03 commit 2c2878b

File tree

2 files changed

+71
-61
lines changed

2 files changed

+71
-61
lines changed

autopilot/main.go

Lines changed: 70 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -178,76 +178,86 @@ func (r *AzureK8sAutopilot) initMetricsUpdate() {
178178
prometheus.MustRegister(r.prometheus.update.duration)
179179
}
180180

181-
func (r *AzureK8sAutopilot) Run() {
182-
r.leaderElect()
183-
log.Infof("starting cluster check loop")
181+
func (r *AzureK8sAutopilot) Start() {
182+
go func() {
183+
r.leaderElect()
184+
log.Infof("starting autopilot")
185+
186+
if r.Config.Repair.Crontab != "" {
187+
r.startAutopilotRepair()
188+
}
189+
190+
if r.Config.Update.Crontab != "" {
191+
r.startAutopilotUpdate()
192+
}
193+
}()
194+
}
195+
196+
func (r *AzureK8sAutopilot) startAutopilotRepair() {
184197
// repair job
185-
if r.Config.Repair.Crontab != "" {
186-
r.cron.repair = cron.New(
187-
cron.WithChain(
188-
cron.SkipIfStillRunning(
189-
cron.PrintfLogger(
190-
log.StandardLogger(),
191-
),
198+
r.cron.repair = cron.New(
199+
cron.WithChain(
200+
cron.SkipIfStillRunning(
201+
cron.PrintfLogger(
202+
log.StandardLogger(),
192203
),
193204
),
194-
)
195-
196-
_, err := r.cron.repair.AddFunc(r.Config.Repair.Crontab, func() {
197-
contextLogger := log.WithField("job", "repair")
198-
199-
// concurrency repair limit
200-
if r.Config.Repair.Limit > 0 && r.nodeRepairLock.ItemCount() >= r.Config.Repair.Limit {
201-
contextLogger.Infof("concurrent repair limit reached, skipping run")
202-
} else {
203-
start := time.Now()
204-
contextLogger.Infoln("starting repair check")
205-
r.repairRun(contextLogger)
206-
runtime := time.Now().Sub(start)
207-
r.prometheus.repair.duration.WithLabelValues().Set(runtime.Seconds())
208-
contextLogger.WithField("duration", runtime.String()).Infof("finished after %s", runtime.String())
209-
}
210-
})
211-
if err != nil {
212-
log.Panic(err)
213-
}
205+
),
206+
)
214207

215-
r.cron.repair.Start()
208+
_, err := r.cron.repair.AddFunc(r.Config.Repair.Crontab, func() {
209+
contextLogger := log.WithField("job", "repair")
210+
211+
// concurrency repair limit
212+
if r.Config.Repair.Limit > 0 && r.nodeRepairLock.ItemCount() >= r.Config.Repair.Limit {
213+
contextLogger.Infof("concurrent repair limit reached, skipping run")
214+
} else {
215+
start := time.Now()
216+
contextLogger.Infoln("starting repair check")
217+
r.repairRun(contextLogger)
218+
runtime := time.Now().Sub(start)
219+
r.prometheus.repair.duration.WithLabelValues().Set(runtime.Seconds())
220+
contextLogger.WithField("duration", runtime.String()).Infof("finished after %s", runtime.String())
221+
}
222+
})
223+
if err != nil {
224+
log.Panic(err)
216225
}
217226

218-
// upgrade job
219-
if r.Config.Update.Crontab != "" {
220-
r.cron.update = cron.New(
221-
cron.WithChain(
222-
cron.SkipIfStillRunning(
223-
cron.PrintfLogger(
224-
log.StandardLogger(),
225-
),
227+
r.cron.repair.Start()
228+
}
229+
230+
func (r *AzureK8sAutopilot) startAutopilotUpdate() {
231+
r.cron.update = cron.New(
232+
cron.WithChain(
233+
cron.SkipIfStillRunning(
234+
cron.PrintfLogger(
235+
log.StandardLogger(),
226236
),
227237
),
228-
)
229-
230-
_, err := r.cron.update.AddFunc(r.Config.Update.Crontab, func() {
231-
contextLogger := log.WithField("job", "update")
232-
233-
// concurrency repair limit
234-
if r.Config.Update.Limit > 0 && r.nodeUpdateLock.ItemCount() >= r.Config.Update.Limit {
235-
contextLogger.Infof("concurrent update limit reached, skipping run")
236-
} else {
237-
contextLogger.Infoln("starting update check")
238-
start := time.Now()
239-
r.updateRun(contextLogger)
240-
runtime := time.Now().Sub(start)
241-
r.prometheus.update.duration.WithLabelValues().Set(runtime.Seconds())
242-
contextLogger.WithField("duration", runtime.String()).Infof("finished after %s", runtime.String())
243-
}
244-
})
245-
if err != nil {
246-
log.Panic(err)
247-
}
238+
),
239+
)
248240

249-
r.cron.update.Start()
241+
_, err := r.cron.update.AddFunc(r.Config.Update.Crontab, func() {
242+
contextLogger := log.WithField("job", "update")
243+
244+
// concurrency repair limit
245+
if r.Config.Update.Limit > 0 && r.nodeUpdateLock.ItemCount() >= r.Config.Update.Limit {
246+
contextLogger.Infof("concurrent update limit reached, skipping run")
247+
} else {
248+
contextLogger.Infoln("starting update check")
249+
start := time.Now()
250+
r.updateRun(contextLogger)
251+
runtime := time.Now().Sub(start)
252+
r.prometheus.update.duration.WithLabelValues().Set(runtime.Seconds())
253+
contextLogger.WithField("duration", runtime.String()).Infof("finished after %s", runtime.String())
254+
}
255+
})
256+
if err != nil {
257+
log.Panic(err)
250258
}
259+
260+
r.cron.update.Start()
251261
}
252262

253263
func (r *AzureK8sAutopilot) leaderElect() {

main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ func main() {
3838
Config: opts,
3939
}
4040
autorepair.Init()
41-
autorepair.Run()
41+
autorepair.Start()
4242

4343
log.Infof("starting http server on %s", opts.ServerBind)
4444
startHttpServer()

0 commit comments

Comments
 (0)