diff --git a/Gemfile b/Gemfile index 8e9ed747c..b20e8d20f 100644 --- a/Gemfile +++ b/Gemfile @@ -3,6 +3,7 @@ source 'https://rubygems.org' gemspec gem 'concurrent-ruby', '~> 1.0', require: 'concurrent' +gem 'redis', require: false # optional: shared script cache for LB capsule deployments Dir[File.join(__dir__, 'bundler.d', '*.rb')].each do |bundle| eval_gemfile(bundle) diff --git a/config/settings.d/registration.yml.example b/config/settings.d/registration.yml.example index 2385095ad..1af4ca46c 100644 --- a/config/settings.d/registration.yml.example +++ b/config/settings.d/registration.yml.example @@ -1,3 +1,17 @@ --- :enabled: false # :registration_url: https://loadbalancer.example.com + +# Optional: shared Redis cache for the registration script, useful when +# multiple capsule nodes sit behind a load balancer. One warm request +# on any node benefits all nodes. Falls back to per-node in-memory cache +# if unset or if Redis is unreachable. Requires the 'redis' gem. +# :cache_url: redis://localhost:6379/0 + +# Optional: maximum number of concurrent POST /register requests forwarded to +# Foreman. When all permits are taken the capsule returns 503 + Retry-After:30 +# immediately. The orchestration layer (Ansible retry_failed, wave batching) +# handles rescheduling — no in-capsule wait queue is used. +# Default: unset (unlimited). Size this relative to Foreman's Rails thread pool +# and database connection pool (typically 5-20 threads/connections). +# :max_concurrent_registrations: 50 diff --git a/modules/registration/proxy_request.rb b/modules/registration/proxy_request.rb index 6ad9e99ee..70f0226a0 100644 --- a/modules/registration/proxy_request.rb +++ b/modules/registration/proxy_request.rb @@ -33,6 +33,15 @@ def host_register(request) send_request(proxy_req) end + def foreman_reachable? + proxy_req = request_factory.create_get '/api/status', {}, {} + send_request(proxy_req) + true + rescue => e + logger.debug "Foreman reachability check failed: #{e.class}: #{e.message}" + false + end + private def request_params(request) diff --git a/modules/registration/registration_api.rb b/modules/registration/registration_api.rb index c4ad22273..4d86d0686 100644 --- a/modules/registration/registration_api.rb +++ b/modules/registration/registration_api.rb @@ -1,17 +1,104 @@ require 'registration/proxy_request' class Proxy::Registration::Api < ::Sinatra::Base + # Cache for the global registration script (GET /register). + # + # The script is identical for all hosts sharing the same registration + # parameters (org, location, hostgroup, activation keys), making it safe + # to serve from an in-memory cache during concurrent bulk registration. + # + # Per-key double-checked locking prevents thundering herd while allowing + # genuinely independent cache keys (e.g. different activation keys) to + # fetch from Foreman in parallel. + # + # Only HTTP 200 responses are cached — errors are raised out of the cache + # block so they never poison the cache. The per-key mutex is evicted + # immediately after caching so KEY_MUTEXES stays bounded to keys currently + # being fetched for the first time (zero under steady state). + REGISTRATION_SCRIPT_CACHE_TTL = 5 * 60 # seconds + KEY_MUTEXES = Concurrent::Map.new + SCRIPT_CACHE = Concurrent::Map.new + + class ScriptFetchError < StandardError + attr_reader :response + + def initialize(response) + super() + @response = response + end + end + + class << self + def registration_script_cache + SCRIPT_CACHE + end + + def key_mutex(cache_key) + KEY_MUTEXES.compute_if_absent(cache_key) { Mutex.new } + end + + def evict_key_mutex(cache_key) + KEY_MUTEXES.delete(cache_key) + end + + # Returns a Redis client when :cache_url is configured, nil otherwise. + # Lazy-initialised; falls back to nil on LoadError (gem not installed) + # or connection error, so in-memory cache remains the fallback. + # Returns a Concurrent::Semaphore when :max_concurrent_registrations is set, + # nil otherwise (unlimited). Lazy-initialised; the semaphore persists for + # the lifetime of the process so the permit count is shared across requests. + def registration_semaphore + return @registration_semaphore if instance_variable_defined?(:@registration_semaphore) + + limit = Proxy::Registration::Plugin.settings.max_concurrent_registrations + @registration_semaphore = limit ? Concurrent::Semaphore.new(limit.to_i) : nil + end + + def registration_cache_client + return @registration_cache_client if instance_variable_defined?(:@registration_cache_client) + + cache_url = Proxy::Registration::Plugin.settings.cache_url + @registration_cache_client = if cache_url + require 'redis' + Redis.new(url: cache_url) + end + rescue LoadError + @registration_cache_client = nil + rescue => e + ::Proxy::Log.logger.warn "Registration: Redis init failed (#{e.class}: #{e.message}); using local cache" + @registration_cache_client = nil + end + end + + get '/health' do + content_type :json + if Proxy::Registration::ProxyRequest.new.foreman_reachable? + { status: 'ok' }.to_json + else + status 503 + { status: 'error', message: 'Foreman is unreachable' }.to_json + end + rescue StandardError => e + logger.exception 'Error during health check', e + status 503 + content_type :json + { status: 'error', message: 'Health check failed' }.to_json + end + get '/' do - response = Proxy::Registration::ProxyRequest.new.global_register(request) - handle_response(response) + registration_script + rescue ScriptFetchError => e + handle_response(e.response) rescue StandardError => e logger.exception "Error when rendering Global Registration Template", e render_error(default_error_msg) end post '/' do - response = Proxy::Registration::ProxyRequest.new.host_register(request) - handle_response(response) + with_concurrency_limit do + resp = Proxy::Registration::ProxyRequest.new.host_register(request) + handle_response(resp) + end rescue StandardError => e logger.exception "Error when rendering Host Registration Template", e render_error(default_error_msg) @@ -19,11 +106,96 @@ class Proxy::Registration::Api < ::Sinatra::Base private + # Runs the block only if a concurrency permit is available. + # Returns the block's value on success. + # Halts with 503 + Retry-After without yielding when all permits are taken. + # + # NOTE: do not call Sinatra's `halt` inside the block — `halt` raises + # Sinatra::HaltResponse which bypasses `ensure`, leaking a permit. + # Use `status` + explicit `return` instead. + def with_concurrency_limit + semaphore = self.class.registration_semaphore + if semaphore && !semaphore.try_acquire + logger.warn "registration_concurrency limit=#{semaphore.count} status=rejected" + response.headers['Retry-After'] = '30' + halt 503, "echo \"Registration queue full, please retry later\"\nexit 1\n" + end + begin + yield + ensure + semaphore&.release + end + end + + def registration_script + cache_key = Rack::Utils.build_query( + Rack::Utils.parse_nested_query(request.query_string).sort_by { |k, _| k } + ) + cache(cache_key) do + response = Proxy::Registration::ProxyRequest.new.global_register(request) + raise ScriptFetchError, response unless response.code == '200' + response.body + end + end + + def cache(key, &block) + value = read_registration_cache(key) + return value if value + + self.class.key_mutex(key).synchronize do + value = read_registration_cache(key) + return value if value + + result = yield + + # Write to Redis first (shared across all capsule nodes in the LB pool) + if (redis = self.class.registration_cache_client) + begin + redis.setex(key, REGISTRATION_SCRIPT_CACHE_TTL, result) + rescue => e + logger.warn "registration_script Redis write failed: #{e.message}" + end + end + + # Always write to local in-memory cache (fallback and fast path) + self.class.registration_script_cache[key] = { body: result, at: Time.now } + self.class.evict_key_mutex(key) + result + end + end + + def read_registration_cache(cache_key) + # Check Redis first — a hit here means another node already fetched the + # script, so we serve it without going to Foreman and also warm the + # local cache for subsequent requests to this node. + if (redis = self.class.registration_cache_client) + begin + cached = redis.get(cache_key) + if cached + logger.debug "registration_script cache=HIT source=shared key_prefix=#{cache_key[0, 40]}" + self.class.registration_script_cache[cache_key] = { body: cached, at: Time.now } + return cached + end + rescue => e + logger.warn "registration_script Redis read failed, falling back to local cache: #{e.message}" + end + end + + # Fall back to per-node in-memory cache + entry = self.class.registration_script_cache[cache_key] + if entry && (Time.now - entry[:at]) < REGISTRATION_SCRIPT_CACHE_TTL + logger.debug "registration_script cache=HIT source=local age=#{(Time.now - entry[:at]).to_i}s key_prefix=#{cache_key[0, 40]}" + entry[:body] + else + logger.debug "registration_script cache=MISS key_prefix=#{cache_key[0, 40]}" + nil + end + end + def handle_response(response) - if response.code.start_with? '2' + if response.code.start_with?('2') response.body else - # Return error message only if it is not HTML. message = response["content-type"].include?('text/plain') ? response.body : default_error_msg render_error(message, code: response.code) end diff --git a/modules/registration/registration_plugin.rb b/modules/registration/registration_plugin.rb index 3d16a84f0..30fe950ea 100644 --- a/modules/registration/registration_plugin.rb +++ b/modules/registration/registration_plugin.rb @@ -12,5 +12,21 @@ class Plugin < ::Proxy::Plugin validate :registration_url, optional_url: true expose_setting :registration_url + + # Optional Redis URL for sharing the registration script cache across + # multiple capsule nodes in an LB pool. When set, all nodes read from + # and write to the same Redis instance so a single warm request benefits + # every node. Falls back to per-node in-memory cache if unset or if + # Redis is unreachable. Requires the 'redis' gem to be installed. + # Example: redis://lb-host:6379/0 + validate :cache_url, optional_url: true + + # Optional limit on simultaneous POST /register requests forwarded to + # Foreman. When all permits are taken, the capsule immediately returns + # 503 with Retry-After: 30 so the client can back off. The orchestration + # layer (Ansible retry_failed, satperf wave batching) handles rescheduling. + # Default: unset (unlimited). Tune based on Foreman's Rails thread pool + # and database connection pool size. + # Example: 50 (permits ~50 concurrent host record creations) end end diff --git a/test/registration/registration_api_test.rb b/test/registration/registration_api_test.rb index bd9ff7b1b..5a63c447f 100644 --- a/test/registration/registration_api_test.rb +++ b/test/registration/registration_api_test.rb @@ -11,6 +11,13 @@ def app def setup @foreman_url = 'http://foreman.example.com' Proxy::SETTINGS.stubs(:foreman_url).returns(@foreman_url) + Proxy::Registration::Plugin.settings.stubs(:max_concurrent_registrations).returns(nil) + Proxy::Registration::Plugin.settings.stubs(:cache_url).returns(nil) + # Clear class-level state between tests to prevent cross-test contamination + Proxy::Registration::Api.registration_script_cache.clear + Proxy::Registration::Api::KEY_MUTEXES.clear + Proxy::Registration::Api.instance_variable_set(:@registration_semaphore, nil) + Proxy::Registration::Api.instance_variable_set(:@registration_cache_client, nil) end def test_global_register_template @@ -102,6 +109,86 @@ def test_global_500 assert_match("echo \"Internal Server Error\"\nexit 1\n", last_response.body) end + def test_global_register_caches_response + stub = stub_request(:get, "#{@foreman_url}/register").to_return(body: 'template') + + 2.times do + get '/' + assert last_response.ok? + assert_match('template', last_response.body) + end + + assert_requested stub, times: 1 + end + + def test_global_register_cache_key_is_parameter_order_independent + # Cache key is normalised (params sorted alphabetically), so both orderings + # produce activation_keys=rhel9&owner=Default_Organization and share one entry. + stub_request(:get, "#{@foreman_url}/register?activation_keys=rhel9&owner=Default_Organization") + .to_return(body: 'template') + + get '/', { owner: 'Default_Organization', activation_keys: 'rhel9' } + assert last_response.ok? + + # Different parameter order — must hit cache, not Foreman again + get '/', { activation_keys: 'rhel9', owner: 'Default_Organization' } + assert last_response.ok? + + assert_requested :get, "#{@foreman_url}/register?activation_keys=rhel9&owner=Default_Organization", times: 1 + end + + def test_global_register_caches_per_key + stub_a = stub_request(:get, "#{@foreman_url}/register?key=a").to_return(body: 'template_a') + stub_b = stub_request(:get, "#{@foreman_url}/register?key=b").to_return(body: 'template_b') + + get '/', { key: 'a' } + assert_match('template_a', last_response.body) + get '/', { key: 'b' } + assert_match('template_b', last_response.body) + # second requests — must be served from cache + get '/', { key: 'a' } + assert_match('template_a', last_response.body) + get '/', { key: 'b' } + assert_match('template_b', last_response.body) + + assert_requested stub_a, times: 1 + assert_requested stub_b, times: 1 + end + + def test_global_register_evicts_mutex_after_caching + stub_request(:get, "#{@foreman_url}/register").to_return(body: 'template') + + get '/' + assert last_response.ok? + assert_empty Proxy::Registration::Api::KEY_MUTEXES + end + + def test_global_register_cache_entry_expires_after_ttl + Proxy::Registration::Api.registration_script_cache[''] = { + body: 'stale-template', + at: Time.now - Proxy::Registration::Api::REGISTRATION_SCRIPT_CACHE_TTL - 1, + } + stub = stub_request(:get, "#{@foreman_url}/register").to_return(body: 'fresh-template') + + get '/' + assert last_response.ok? + assert_match('fresh-template', last_response.body) + assert_requested stub, times: 1 + end + + def test_global_register_does_not_cache_errors + stub = stub_request(:get, "#{@foreman_url}/register").to_return( + body: 'error', status: 500, headers: { "Content-Type" => 'text/plain' } + ) + + 2.times do + get '/' + assert last_response.server_error? + end + + assert_requested stub, times: 2 + end + def test_host_500 Rack::NullLogger.any_instance.stubs(:exception) stub_request(:post, "#{@foreman_url}/register").to_timeout @@ -110,4 +197,144 @@ def test_host_500 assert last_response.server_error? assert_match("echo \"Internal Server Error\"\nexit 1\n", last_response.body) end + + # --------------------------------------------------------------------------- + # GET /health + # --------------------------------------------------------------------------- + + def test_health_returns_ok_when_foreman_reachable + Proxy::Registration::ProxyRequest.any_instance.stubs(:foreman_reachable?).returns(true) + + get '/health' + assert last_response.ok? + assert_equal 'application/json', last_response.content_type + assert_match('"status":"ok"', last_response.body) + end + + def test_health_returns_503_when_foreman_unreachable + Proxy::Registration::ProxyRequest.any_instance.stubs(:foreman_reachable?).returns(false) + + get '/health' + assert_equal 503, last_response.status + assert_match('"status":"error"', last_response.body) + end + + def test_health_returns_503_on_unexpected_error + Rack::NullLogger.any_instance.stubs(:exception) + Proxy::Registration::ProxyRequest.any_instance.stubs(:foreman_reachable?).raises(StandardError, 'boom') + + get '/health' + assert_equal 503, last_response.status + assert_match('"status":"error"', last_response.body) + end + + # --------------------------------------------------------------------------- + # Concurrency limiter (:max_concurrent_registrations) + # --------------------------------------------------------------------------- + + def test_host_register_proceeds_when_within_limit + Proxy::Registration::Plugin.settings.stubs(:max_concurrent_registrations).returns(2) + stub_request(:post, "#{@foreman_url}/register").to_return(body: 'template') + + post '/' + assert last_response.ok? + assert_match('template', last_response.body) + end + + def test_host_register_returns_503_when_limit_exhausted + Proxy::Registration::Plugin.settings.stubs(:max_concurrent_registrations).returns(1) + # Exhaust the one available permit before the request arrives + Proxy::Registration::Api.registration_semaphore.try_acquire + + post '/' + assert_equal 503, last_response.status + assert_equal '30', last_response.headers['Retry-After'] + assert_match('retry', last_response.body) + end + + def test_host_register_releases_permit_after_success + Proxy::Registration::Plugin.settings.stubs(:max_concurrent_registrations).returns(1) + stub_request(:post, "#{@foreman_url}/register").to_return(body: 'template') + + post '/' + assert last_response.ok? + # Permit must be released — a second request should also succeed + post '/' + assert last_response.ok? + end + + def test_host_register_releases_permit_after_error + Rack::NullLogger.any_instance.stubs(:exception) + Proxy::Registration::Plugin.settings.stubs(:max_concurrent_registrations).returns(1) + stub_request(:post, "#{@foreman_url}/register").to_timeout + + post '/' + assert last_response.server_error? + # with_concurrency_limit's ensure block must release permit even on error + assert_equal 1, Proxy::Registration::Api.registration_semaphore.available_permits + end + + def test_host_register_unlimited_when_setting_absent + # Default (nil) means no semaphore — unlimited concurrency + assert_nil Proxy::Registration::Api.registration_semaphore + stub_request(:post, "#{@foreman_url}/register").to_return(body: 'template') + + post '/' + assert last_response.ok? + end + + # --------------------------------------------------------------------------- + # Shared Redis cache (:cache_url) + # --------------------------------------------------------------------------- + + def test_global_register_serves_from_shared_cache_on_hit + redis = mock('redis') + redis.stubs(:get).returns('cached-template') + Proxy::Registration::Api.instance_variable_set(:@registration_cache_client, redis) + stub = stub_request(:get, "#{@foreman_url}/register").to_return(body: 'fresh') + + get '/' + assert last_response.ok? + assert_match('cached-template', last_response.body) + assert_not_requested stub + end + + def test_global_register_writes_to_shared_cache_on_miss + redis = mock('redis') + redis.stubs(:get).returns(nil) + redis.expects(:setex).with(anything, Proxy::Registration::Api::REGISTRATION_SCRIPT_CACHE_TTL, 'template') + Proxy::Registration::Api.instance_variable_set(:@registration_cache_client, redis) + stub_request(:get, "#{@foreman_url}/register").to_return(body: 'template') + + get '/' + assert last_response.ok? + assert_match('template', last_response.body) + end + + def test_global_register_falls_back_to_local_cache_on_redis_read_error + Rack::NullLogger.any_instance.stubs(:warn) + redis = mock('redis') + redis.stubs(:get).raises(StandardError, 'connection refused') + Proxy::Registration::Api.instance_variable_set(:@registration_cache_client, redis) + stub_request(:get, "#{@foreman_url}/register").to_return(body: 'template') + + get '/' + assert last_response.ok? + assert_match('template', last_response.body) + end + + def test_global_register_continues_on_redis_write_error + Rack::NullLogger.any_instance.stubs(:warn) + redis = mock('redis') + redis.stubs(:get).returns(nil) + redis.stubs(:setex).raises(StandardError, 'write error') + Proxy::Registration::Api.instance_variable_set(:@registration_cache_client, redis) + stub_request(:get, "#{@foreman_url}/register").to_return(body: 'template') + + get '/' + assert last_response.ok? + assert_match('template', last_response.body) + # Local cache must still be populated as fallback + assert_not_nil Proxy::Registration::Api.registration_script_cache[''] + end end