pablomh · pablomh · Apr 1, 2026 · Apr 4, 2026 · Apr 4, 2026 · Apr 4, 2026
diff --git a/Gemfile b/Gemfile
@@ -3,6 +3,7 @@ source 'https://rubygems.org'
 gemspec
 
 gem 'concurrent-ruby', '~> 1.0', require: 'concurrent'
+gem 'redis', require: false # optional: shared script cache for LB capsule deployments
 
 Dir[File.join(__dir__, 'bundler.d', '*.rb')].each do |bundle|
   eval_gemfile(bundle)

diff --git a/config/settings.d/registration.yml.example b/config/settings.d/registration.yml.example
@@ -1,3 +1,17 @@
 ---
 :enabled: false
 # :registration_url: https://loadbalancer.example.com
+
+# Optional: shared Redis cache for the registration script, useful when
+# multiple capsule nodes sit behind a load balancer. One warm request
+# on any node benefits all nodes. Falls back to per-node in-memory cache
+# if unset or if Redis is unreachable. Requires the 'redis' gem.
+# :cache_url: redis://localhost:6379/0
+
+# Optional: maximum number of concurrent POST /register requests forwarded to
+# Foreman. When all permits are taken the capsule returns 503 + Retry-After:30
+# immediately. The orchestration layer (Ansible retry_failed, wave batching)
+# handles rescheduling — no in-capsule wait queue is used.
+# Default: unset (unlimited). Size this relative to Foreman's Rails thread pool
+# and database connection pool (typically 5-20 threads/connections).
+# :max_concurrent_registrations: 50
diff --git a/modules/registration/proxy_request.rb b/modules/registration/proxy_request.rb
@@ -33,6 +33,15 @@ def host_register(request)
       send_request(proxy_req)
     end
 
+    def foreman_reachable?
+      proxy_req = request_factory.create_get '/api/status', {}, {}
+      send_request(proxy_req)
+      true
+    rescue => e
+      logger.debug "Foreman reachability check failed: #{e.class}: #{e.message}"
+      false
+    end
+
     private
 
     def request_params(request)

diff --git a/modules/registration/registration_api.rb b/modules/registration/registration_api.rb
@@ -1,29 +1,201 @@
 require 'registration/proxy_request'
 
 class Proxy::Registration::Api < ::Sinatra::Base
+  # Cache for the global registration script (GET /register).
+  #
+  # The script is identical for all hosts sharing the same registration
+  # parameters (org, location, hostgroup, activation keys), making it safe
+  # to serve from an in-memory cache during concurrent bulk registration.
+  #
+  # Per-key double-checked locking prevents thundering herd while allowing
+  # genuinely independent cache keys (e.g. different activation keys) to
+  # fetch from Foreman in parallel.
+  #
+  # Only HTTP 200 responses are cached — errors are raised out of the cache
+  # block so they never poison the cache. The per-key mutex is evicted
+  # immediately after caching so KEY_MUTEXES stays bounded to keys currently
+  # being fetched for the first time (zero under steady state).
+  REGISTRATION_SCRIPT_CACHE_TTL = 5 * 60 # seconds
+  KEY_MUTEXES                   = Concurrent::Map.new
+  SCRIPT_CACHE                  = Concurrent::Map.new
+
+  class ScriptFetchError < StandardError
+    attr_reader :response
+
+    def initialize(response)
+      super()
+      @response = response
+    end
+  end
+
+  class << self
+    def registration_script_cache
+      SCRIPT_CACHE
+    end
+
+    def key_mutex(cache_key)
+      KEY_MUTEXES.compute_if_absent(cache_key) { Mutex.new }
+    end
+
+    def evict_key_mutex(cache_key)
+      KEY_MUTEXES.delete(cache_key)
+    end
+
+    # Returns a Redis client when :cache_url is configured, nil otherwise.
+    # Lazy-initialised; falls back to nil on LoadError (gem not installed)
+    # or connection error, so in-memory cache remains the fallback.
+    # Returns a Concurrent::Semaphore when :max_concurrent_registrations is set,
+    # nil otherwise (unlimited). Lazy-initialised; the semaphore persists for
+    # the lifetime of the process so the permit count is shared across requests.
+    def registration_semaphore
+      return @registration_semaphore if instance_variable_defined?(:@registration_semaphore)
+
+      limit = Proxy::Registration::Plugin.settings.max_concurrent_registrations
+      @registration_semaphore = limit ? Concurrent::Semaphore.new(limit.to_i) : nil
+    end
+
+    def registration_cache_client
+      return @registration_cache_client if instance_variable_defined?(:@registration_cache_client)
+
+      cache_url = Proxy::Registration::Plugin.settings.cache_url
+      @registration_cache_client = if cache_url
+                        require 'redis'
+                        Redis.new(url: cache_url)
+                      end
+    rescue LoadError
+      @registration_cache_client = nil
+    rescue => e
+      ::Proxy::Log.logger.warn "Registration: Redis init failed (#{e.class}: #{e.message}); using local cache"
+      @registration_cache_client = nil
+    end
+  end
+
+  get '/health' do
+    content_type :json
+    if Proxy::Registration::ProxyRequest.new.foreman_reachable?
+      { status: 'ok' }.to_json
+    else
+      status 503
+      { status: 'error', message: 'Foreman is unreachable' }.to_json
+    end
+  rescue StandardError => e
+    logger.exception 'Error during health check', e
+    status 503
+    content_type :json
+    { status: 'error', message: 'Health check failed' }.to_json
+  end
+
   get '/' do
-    response = Proxy::Registration::ProxyRequest.new.global_register(request)
-    handle_response(response)
+    registration_script
+  rescue ScriptFetchError => e
+    handle_response(e.response)
   rescue StandardError => e
     logger.exception "Error when rendering Global Registration Template", e
     render_error(default_error_msg)
   end
 
   post '/' do
-    response = Proxy::Registration::ProxyRequest.new.host_register(request)
-    handle_response(response)
+    with_concurrency_limit do
+      resp = Proxy::Registration::ProxyRequest.new.host_register(request)
+      handle_response(resp)
+    end
   rescue StandardError => e
     logger.exception "Error when rendering Host Registration Template", e
     render_error(default_error_msg)
   end
 
   private
 
+  # Runs the block only if a concurrency permit is available.
+  # Returns the block's value on success.
+  # Halts with 503 + Retry-After without yielding when all permits are taken.
+  #
+  # NOTE: do not call Sinatra's `halt` inside the block — `halt` raises
+  # Sinatra::HaltResponse which bypasses `ensure`, leaking a permit.
+  # Use `status` + explicit `return` instead.
+  def with_concurrency_limit
+    semaphore = self.class.registration_semaphore
+    if semaphore && !semaphore.try_acquire
+      logger.warn "registration_concurrency limit=#{semaphore.count} status=rejected"
+      response.headers['Retry-After'] = '30'
+      halt 503, "echo \"Registration queue full, please retry later\"\nexit 1\n"
+    end
+    begin
+      yield
+    ensure
+      semaphore&.release
+    end
+  end
+
+  def registration_script
+    cache_key = Rack::Utils.build_query(
+      Rack::Utils.parse_nested_query(request.query_string).sort_by { |k, _| k }
+    )
+    cache(cache_key) do
+      response = Proxy::Registration::ProxyRequest.new.global_register(request)
+      raise ScriptFetchError, response unless response.code == '200'
+      response.body
+    end
+  end
+
+  def cache(key, &block)
+    value = read_registration_cache(key)
+    return value if value
+
+    self.class.key_mutex(key).synchronize do
+      value = read_registration_cache(key)
+      return value if value
+
+      result = yield
+
+      # Write to Redis first (shared across all capsule nodes in the LB pool)
+      if (redis = self.class.registration_cache_client)
+        begin
+          redis.setex(key, REGISTRATION_SCRIPT_CACHE_TTL, result)
+        rescue => e
+          logger.warn "registration_script Redis write failed: #{e.message}"
+        end
+      end
+
+      # Always write to local in-memory cache (fallback and fast path)
+      self.class.registration_script_cache[key] = { body: result, at: Time.now }
+      self.class.evict_key_mutex(key)
+      result
+    end
+  end
+
+  def read_registration_cache(cache_key)
+    # Check Redis first — a hit here means another node already fetched the
+    # script, so we serve it without going to Foreman and also warm the
+    # local cache for subsequent requests to this node.
+    if (redis = self.class.registration_cache_client)
+      begin
+        cached = redis.get(cache_key)
+        if cached
+          logger.debug "registration_script cache=HIT source=shared key_prefix=#{cache_key[0, 40]}"
+          self.class.registration_script_cache[cache_key] = { body: cached, at: Time.now }
+          return cached
+        end
+      rescue => e
+        logger.warn "registration_script Redis read failed, falling back to local cache: #{e.message}"
+      end
+    end
+
+    # Fall back to per-node in-memory cache
+    entry = self.class.registration_script_cache[cache_key]
+    if entry && (Time.now - entry[:at]) < REGISTRATION_SCRIPT_CACHE_TTL
+      logger.debug "registration_script cache=HIT source=local age=#{(Time.now - entry[:at]).to_i}s key_prefix=#{cache_key[0, 40]}"
+      entry[:body]
+    else
+      logger.debug "registration_script cache=MISS key_prefix=#{cache_key[0, 40]}"
+      nil
+    end
+  end
+
   def handle_response(response)
-    if response.code.start_with? '2'
+    if response.code.start_with?('2')
       response.body
     else
-      # Return error message only if it is not HTML.
       message = response["content-type"].include?('text/plain') ? response.body : default_error_msg
       render_error(message, code: response.code)
     end

diff --git a/modules/registration/registration_plugin.rb b/modules/registration/registration_plugin.rb
@@ -12,5 +12,21 @@ class Plugin < ::Proxy::Plugin
 
     validate :registration_url, optional_url: true
     expose_setting :registration_url
+
+    # Optional Redis URL for sharing the registration script cache across
+    # multiple capsule nodes in an LB pool. When set, all nodes read from
+    # and write to the same Redis instance so a single warm request benefits
+    # every node. Falls back to per-node in-memory cache if unset or if
+    # Redis is unreachable. Requires the 'redis' gem to be installed.
+    # Example: redis://lb-host:6379/0
+    validate :cache_url, optional_url: true
+
+    # Optional limit on simultaneous POST /register requests forwarded to
+    # Foreman. When all permits are taken, the capsule immediately returns
+    # 503 with Retry-After: 30 so the client can back off. The orchestration
+    # layer (Ansible retry_failed, satperf wave batching) handles rescheduling.
+    # Default: unset (unlimited). Tune based on Foreman's Rails thread pool
+    # and database connection pool size.
+    # Example: 50 (permits ~50 concurrent host record creations)
   end
 end