Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ source 'https://rubygems.org'
gemspec

gem 'concurrent-ruby', '~> 1.0', require: 'concurrent'
gem 'redis', require: false # optional: shared script cache for LB capsule deployments

Dir[File.join(__dir__, 'bundler.d', '*.rb')].each do |bundle|
eval_gemfile(bundle)
Expand Down
14 changes: 14 additions & 0 deletions config/settings.d/registration.yml.example
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
---
:enabled: false
# :registration_url: https://loadbalancer.example.com

# Optional: shared Redis cache for the registration script, useful when
# multiple capsule nodes sit behind a load balancer. One warm request
# on any node benefits all nodes. Falls back to per-node in-memory cache
# if unset or if Redis is unreachable. Requires the 'redis' gem.
# :cache_url: redis://localhost:6379/0

# Optional: maximum number of concurrent POST /register requests forwarded to
# Foreman. When all permits are taken the capsule returns 503 + Retry-After:30
# immediately. The orchestration layer (Ansible retry_failed, wave batching)
# handles rescheduling — no in-capsule wait queue is used.
# Default: unset (unlimited). Size this relative to Foreman's Rails thread pool
# and database connection pool (typically 5-20 threads/connections).
# :max_concurrent_registrations: 50
9 changes: 9 additions & 0 deletions modules/registration/proxy_request.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,15 @@ def host_register(request)
send_request(proxy_req)
end

def foreman_reachable?
proxy_req = request_factory.create_get '/api/status', {}, {}
send_request(proxy_req)
true
rescue => e
logger.debug "Foreman reachability check failed: #{e.class}: #{e.message}"
false
end

private

def request_params(request)
Expand Down
184 changes: 178 additions & 6 deletions modules/registration/registration_api.rb
Original file line number Diff line number Diff line change
@@ -1,29 +1,201 @@
require 'registration/proxy_request'

class Proxy::Registration::Api < ::Sinatra::Base
# Cache for the global registration script (GET /register).
#
# The script is identical for all hosts sharing the same registration
# parameters (org, location, hostgroup, activation keys), making it safe
# to serve from an in-memory cache during concurrent bulk registration.
#
# Per-key double-checked locking prevents thundering herd while allowing
# genuinely independent cache keys (e.g. different activation keys) to
# fetch from Foreman in parallel.
#
# Only HTTP 200 responses are cached — errors are raised out of the cache
# block so they never poison the cache. The per-key mutex is evicted
# immediately after caching so KEY_MUTEXES stays bounded to keys currently
# being fetched for the first time (zero under steady state).
REGISTRATION_SCRIPT_CACHE_TTL = 5 * 60 # seconds
KEY_MUTEXES = Concurrent::Map.new
SCRIPT_CACHE = Concurrent::Map.new

class ScriptFetchError < StandardError
attr_reader :response

def initialize(response)
super()
@response = response
end
end

class << self
def registration_script_cache
SCRIPT_CACHE
end

def key_mutex(cache_key)
KEY_MUTEXES.compute_if_absent(cache_key) { Mutex.new }
end

def evict_key_mutex(cache_key)
KEY_MUTEXES.delete(cache_key)
end

# Returns a Redis client when :cache_url is configured, nil otherwise.
# Lazy-initialised; falls back to nil on LoadError (gem not installed)
# or connection error, so in-memory cache remains the fallback.
# Returns a Concurrent::Semaphore when :max_concurrent_registrations is set,
# nil otherwise (unlimited). Lazy-initialised; the semaphore persists for
# the lifetime of the process so the permit count is shared across requests.
def registration_semaphore
return @registration_semaphore if instance_variable_defined?(:@registration_semaphore)

limit = Proxy::Registration::Plugin.settings.max_concurrent_registrations
@registration_semaphore = limit ? Concurrent::Semaphore.new(limit.to_i) : nil
end

def registration_cache_client
return @registration_cache_client if instance_variable_defined?(:@registration_cache_client)

cache_url = Proxy::Registration::Plugin.settings.cache_url
@registration_cache_client = if cache_url
require 'redis'
Redis.new(url: cache_url)
end
rescue LoadError
@registration_cache_client = nil
rescue => e
::Proxy::Log.logger.warn "Registration: Redis init failed (#{e.class}: #{e.message}); using local cache"
@registration_cache_client = nil
end
end

get '/health' do
content_type :json
if Proxy::Registration::ProxyRequest.new.foreman_reachable?
{ status: 'ok' }.to_json
else
status 503
{ status: 'error', message: 'Foreman is unreachable' }.to_json
end
rescue StandardError => e
logger.exception 'Error during health check', e
status 503
content_type :json
{ status: 'error', message: 'Health check failed' }.to_json
end

get '/' do
response = Proxy::Registration::ProxyRequest.new.global_register(request)
handle_response(response)
registration_script
rescue ScriptFetchError => e
handle_response(e.response)
rescue StandardError => e
logger.exception "Error when rendering Global Registration Template", e
render_error(default_error_msg)
end

post '/' do
response = Proxy::Registration::ProxyRequest.new.host_register(request)
handle_response(response)
with_concurrency_limit do
resp = Proxy::Registration::ProxyRequest.new.host_register(request)
handle_response(resp)
end
rescue StandardError => e
logger.exception "Error when rendering Host Registration Template", e
render_error(default_error_msg)
end

private

# Runs the block only if a concurrency permit is available.
# Returns the block's value on success.
# Halts with 503 + Retry-After without yielding when all permits are taken.
#
# NOTE: do not call Sinatra's `halt` inside the block — `halt` raises
# Sinatra::HaltResponse which bypasses `ensure`, leaking a permit.
# Use `status` + explicit `return` instead.
def with_concurrency_limit
semaphore = self.class.registration_semaphore
if semaphore && !semaphore.try_acquire
logger.warn "registration_concurrency limit=#{semaphore.count} status=rejected"
response.headers['Retry-After'] = '30'
halt 503, "echo \"Registration queue full, please retry later\"\nexit 1\n"
end
begin
yield
ensure
semaphore&.release
end
end

def registration_script
cache_key = Rack::Utils.build_query(
Rack::Utils.parse_nested_query(request.query_string).sort_by { |k, _| k }
)
cache(cache_key) do
response = Proxy::Registration::ProxyRequest.new.global_register(request)
raise ScriptFetchError, response unless response.code == '200'
response.body
end
end

def cache(key, &block)
value = read_registration_cache(key)
return value if value

self.class.key_mutex(key).synchronize do
value = read_registration_cache(key)
return value if value

result = yield

# Write to Redis first (shared across all capsule nodes in the LB pool)
if (redis = self.class.registration_cache_client)
begin
redis.setex(key, REGISTRATION_SCRIPT_CACHE_TTL, result)
rescue => e
logger.warn "registration_script Redis write failed: #{e.message}"
end
end

# Always write to local in-memory cache (fallback and fast path)
self.class.registration_script_cache[key] = { body: result, at: Time.now }
self.class.evict_key_mutex(key)
result
end
end

def read_registration_cache(cache_key)
# Check Redis first — a hit here means another node already fetched the
# script, so we serve it without going to Foreman and also warm the
# local cache for subsequent requests to this node.
if (redis = self.class.registration_cache_client)
begin
cached = redis.get(cache_key)
if cached
logger.debug "registration_script cache=HIT source=shared key_prefix=#{cache_key[0, 40]}"
self.class.registration_script_cache[cache_key] = { body: cached, at: Time.now }
return cached
end
rescue => e
logger.warn "registration_script Redis read failed, falling back to local cache: #{e.message}"
end
end

# Fall back to per-node in-memory cache
entry = self.class.registration_script_cache[cache_key]
if entry && (Time.now - entry[:at]) < REGISTRATION_SCRIPT_CACHE_TTL
logger.debug "registration_script cache=HIT source=local age=#{(Time.now - entry[:at]).to_i}s key_prefix=#{cache_key[0, 40]}"
entry[:body]
else
logger.debug "registration_script cache=MISS key_prefix=#{cache_key[0, 40]}"
nil
end
end

def handle_response(response)
if response.code.start_with? '2'
if response.code.start_with?('2')
response.body
else
# Return error message only if it is not HTML.
message = response["content-type"].include?('text/plain') ? response.body : default_error_msg
render_error(message, code: response.code)
end
Expand Down
16 changes: 16 additions & 0 deletions modules/registration/registration_plugin.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,21 @@ class Plugin < ::Proxy::Plugin

validate :registration_url, optional_url: true
expose_setting :registration_url

# Optional Redis URL for sharing the registration script cache across
# multiple capsule nodes in an LB pool. When set, all nodes read from
# and write to the same Redis instance so a single warm request benefits
# every node. Falls back to per-node in-memory cache if unset or if
# Redis is unreachable. Requires the 'redis' gem to be installed.
# Example: redis://lb-host:6379/0
validate :cache_url, optional_url: true

# Optional limit on simultaneous POST /register requests forwarded to
# Foreman. When all permits are taken, the capsule immediately returns
# 503 with Retry-After: 30 so the client can back off. The orchestration
# layer (Ansible retry_failed, satperf wave batching) handles rescheduling.
# Default: unset (unlimited). Tune based on Foreman's Rails thread pool
# and database connection pool size.
# Example: 50 (permits ~50 concurrent host record creations)
end
end
Loading