diff --git a/.github/workflows/goreleaser.yml b/.github/workflows/goreleaser.yml index aa94eb8..b7ff667 100644 --- a/.github/workflows/goreleaser.yml +++ b/.github/workflows/goreleaser.yml @@ -15,7 +15,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v4 with: - go-version: "^1.22" + go-version: "^1.23" - name: Run GoReleaser uses: goreleaser/goreleaser-action@v5 with: @@ -23,4 +23,4 @@ jobs: version: latest args: release --clean env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 354c588..ef41c5f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,7 +10,7 @@ jobs: strategy: matrix: go-version: - - 1.22.x + - 1.23.x steps: - name: checkout uses: actions/checkout@v4 @@ -21,4 +21,4 @@ jobs: - name: vet run: go vet ./... - name: test - run: go test -v -race ./... \ No newline at end of file + run: go test -v -race ./... diff --git a/README.md b/README.md index 9a8f430..1f9ea73 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,9 @@ groups: # How often should we talk to Hetzner to look for changes w.r.t. the infra itself? poll_interval: 60s + # How long to wait after applying a plan before allowing another plan. Default is 0 (disabled). + post_plan_delay: 1s + hetzner: api_token: "abc123" # Your Hetzner API token. project_id: 123456 # Your Hetzner's project ID (you can find it in the URL in the Hetzner dashboard). diff --git a/config/cfgmodel/model.go b/config/cfgmodel/model.go index 797c055..4cbaeef 100644 --- a/config/cfgmodel/model.go +++ b/config/cfgmodel/model.go @@ -30,6 +30,11 @@ type GroupConfig struct { // This defaults to 30 seconds. PlanApplyTimeout time.Duration `koanf:"plan_apply_timeout"` + // PostPlanDelay is the time to wait after a plan is applied before allowing another plan. + // This is useful to prevent plans happening too quickly in succession. + // This defaults to 0 (disabled). + PostPlanDelay time.Duration `koanf:"post_plan_delay"` + // PlanApplyWithUnknownStatus is a flag that indicates that the group should apply plans // even if the status of one or more servers is unknown. PlanApplyWithUnkownStatus bool `koanf:"plan_apply_with_unknown_status"` @@ -90,6 +95,11 @@ func (c GroupConfig) PlanApplyTimeoutOrDefault() time.Duration { return c.PlanApplyTimeout } +// PostPlanDelayOrDefault returns the post-plan delay (default is 0, disabled). +func (c GroupConfig) PostPlanDelayOrDefault() time.Duration { + return c.PostPlanDelay +} + // ServiceConfig describes the service. The name is used in metrics and logs. type ServiceConfig struct { Name string `koanf:"name"` diff --git a/go.mod b/go.mod index f1dda38..312bcba 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/gzuidhof/flipper -go 1.22 +go 1.23 require github.com/go-ozzo/ozzo-validation/v4 v4.3.0 diff --git a/monitor/group.go b/monitor/group.go index 2bafab4..58f9d2d 100644 --- a/monitor/group.go +++ b/monitor/group.go @@ -91,17 +91,20 @@ func (g *Group) executePlan(ctx context.Context, logger *slog.Logger, state plan ) } - if g.provider.Name() == "hetzner" { - // Hetzner keeps the floating IPs locked for a short while after assigning them. - // So we add a small sleep here to prevent a potential plan that happens right after from failing. - // This is a bit of a hack, but it's the simplest solution for now. A potential future solution - // could be to retry on lock errors (within the Hetzner provider perhaps). - time.Sleep(time.Second) - } - return nil } +func (g *Group) applyPostPlanDelay(ctx context.Context) { + delay := g.cfg.PostPlanDelayOrDefault() + if delay <= 0 { + return + } + select { + case <-ctx.Done(): + case <-time.After(delay): + } +} + // Start watching the resources and performing health checks. // This function blocks until the context is cancelled. func (g *Group) Start(ctx context.Context) error { @@ -201,6 +204,8 @@ func (g *Group) Start(ctx context.Context) error { _ = g.notifier.Notify(ctx, msg) logger.InfoContext(ctx, "Plan executed successfully.", slog.Int("num_unhealthy_servers", numUnhealthy)) + + g.applyPostPlanDelay(ctx) } minSequence = g.watcher.performUpdate(ctx, updateChan, errChan, true)