diff --git a/Project.toml b/Project.toml index 549869d..dda2dff 100644 --- a/Project.toml +++ b/Project.toml @@ -8,6 +8,8 @@ Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" MbedTLS = "739be429-bea8-5141-9913-cc70e7f3736d" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce" Sockets = "6462fe0b-24de-5631-8697-dd941f90decc" SodiumSeal = "2133526b-2bfb-4018-ac12-889fb3908a75" URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" @@ -17,6 +19,8 @@ HTTP = "1.10.17" # Must be >= 1.10.17, see https://github.com/JuliaWeb/GitHub.jl URIs = "1.6" # Must be >= 1.6, see https://github.com/JuliaWeb/GitHub.jl/pull/225 JSON = "0.19, 0.20, 0.21" MbedTLS = "0.6, 0.7, 1" +Random = "1" +SHA = "0.7.0, 1" SodiumSeal = "0.1" julia = "1.6" diff --git a/README.md b/README.md index a75cb1e..89f8ab1 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,13 @@ You can inspect which fields are available for a type `G<:GitHubType` by calling GitHub.jl implements a bunch of methods that make REST requests to GitHub's API. The below sections list these methods (note that a return type of `Tuple{Vector{T}, Dict}` means the result is [paginated](#pagination)). +These methods all accept keyword arguments which control how the request is made to github: + +- `max_retries::Int=5`: how many retries to attempt in requesting the resources. Retries are only made for idempotent requests ("GET", "HEAD", "OPTIONS", "TRACE", "PUT", "DELETE") and delays respect GitHub [rate limit headers](https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api?apiVersion=2022-11-28#checking-the-status-of-your-rate-limit). +- `verbose::Bool=true`: whether or not to log retries as Info level logs +- `max_sleep_seconds::Real=60*20`: if GitHub.jl intends to sleep for longer than `max_sleep_seconds` before retrying, e.g. due to rate limit headers from GitHub, throws an `RetryDelayException` instead. +- `respect_mutation_delay::Bool=true`: whether or not to ensure at least 1s has passed since the last mutation has been submitted to the GitHub API (tracked separately per auth object), following GitHub's [recommended practice](https://docs.github.com/en/rest/using-the-rest-api/best-practices-for-using-the-rest-api?apiVersion=2022-11-28#pause-between-mutative-requests). + #### Users and Organizations | method | return type | documentation | diff --git a/src/GitHub.jl b/src/GitHub.jl index dd6e24c..2dd600d 100644 --- a/src/GitHub.jl +++ b/src/GitHub.jl @@ -2,6 +2,8 @@ module GitHub using Dates using Base64 +using Random +using SHA: sha256 ########## # import # @@ -44,7 +46,7 @@ export # auth.jl authenticate export # requests.jl - rate_limit + rate_limit, RetryDelayException ################################## # Owners (organizations + users) # diff --git a/src/utils/auth.jl b/src/utils/auth.jl index 976b8fa..1eb0681 100644 --- a/src/utils/auth.jl +++ b/src/utils/auth.jl @@ -97,3 +97,28 @@ function Base.show(io::IO, a::OAuth2) token_str = a.token[1:6] * repeat("*", length(a.token) - 6) print(io, "GitHub.OAuth2($token_str)") end + +########### +# Hashing # +########### + +# These are used to implement the 1s mutation delay on a per-auth basis +# See also `wait_for_mutation_delay`. We want it to be difficult to +# invert the hash to recover the token, as the hash is stored in a global dict +# in this module. +let + # this would be better as OncePerProcess to get fresh salts per session, but we support old Julia's so we'll + # have one per precompilation. + salt = randstring(RandomDevice(), 20) + global uint64_hash(str::AbstractString) = first(reinterpret(UInt64, sha256(string(salt, str))[1:8])) +end + +get_auth_hash(auth::OAuth2) = uint64_hash(auth.token) + +get_auth_hash(auth::UsernamePassAuth) = uint64_hash(string(auth.username, "##", auth.password)) + +get_auth_hash(::AnonymousAuth) = UInt64(0) + +# note this will give different hashes for different JWTs from the same key, +# meaning in that case the 1s mutation delay will apply per-JWT instead of per-key +get_auth_hash(auth::JWTAuth) = uint64_hash(auth.JWT) diff --git a/src/utils/requests.jl b/src/utils/requests.jl index 02c33f5..97cd65f 100644 --- a/src/utils/requests.jl +++ b/src/utils/requests.jl @@ -14,6 +14,11 @@ end const DEFAULT_API = GitHubWebAPI(URIs.URI("https://api.github.com")) +const MUTATION_LOCK = ReentrantLock() +const NEXT_AVAILABLE = Dict{UInt64, Float64}() # Maps auth_hash -> next available mutation time +const LAST_CLEARED_TIMESTAMP = Ref{Float64}(0.0) +const CLEAR_OLDER_THAN_SECONDS = 10*60 # 10 minutes + using Base.Meta """ @@ -58,28 +63,261 @@ api_uri(api::GitHubAPI, path) = error("URI retrieval not implemented for this AP # GitHub REST Methods # ####################### -function github_request(api::GitHubAPI, request_method, endpoint; +function safe_tryparse(args...) + try + return tryparse(args...) + catch + nothing + end +end + +""" + github_retry_decision(method::String, resp::Union{HTTP.Response, Nothing}, ex::Union{Exception, Nothing}, exponential_delay::Float64; verbose::Bool=true) -> (should_retry::Bool, sleep_seconds::Float64) + +Analyzes a GitHub API response/exception to determine if a request should be retried and how long to wait. +Uses HTTP.jl's retry logic as a foundation, then adds GitHub-specific rate limiting handling. +This function does NOT perform any sleeping - it only returns the decision and timing information. +Logs retry decisions with detailed rate limit information when retries occur (if verbose=true). + +# Arguments +- `method`: HTTP method string (e.g., "GET", "POST") +- `resp`: HTTP response object (if a response was received), or `nothing` +- `ex`: Exception that occurred (if any), or `nothing` +- `exponential_delay`: The delay from ExponentialBackOff iterator +- `verbose`: Whether to log retry decisions (default: true) + +# Returns +A tuple `(should_retry, sleep_seconds)` where: +- `should_retry`: `true` if the request should be retried, `false` otherwise +- `sleep_seconds`: Number of seconds to sleep before retry (0 if no sleep needed) + +# Retry Logic +1. First uses HTTP.jl's standard retry logic (`isrecoverable` + `isidempotent`) +2. Then adds GitHub-specific rate limiting: + - **Primary rate limit**: `x-ratelimit-remaining: 0` → wait until `x-ratelimit-reset` time + - **Secondary rate limit**: Has `retry-after` header OR error message indicates secondary → + - If `retry-after` present: use that delay + - If `x-ratelimit-remaining: 0`: wait until reset time + - Otherwise: wait at least 1 minute, then use exponential backoff + +This follows the [documentation from GitHub](https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api?apiVersion=2022-11-28#exceeding-the-rate-limit) as of 2025. +""" +function github_retry_decision(method::String, resp::Union{HTTP.Response, Nothing}, ex::Union{Exception, Nothing}, exponential_delay::Float64; verbose::Bool=true) + # If we have a response, process it first (takes precedence over exceptions) + if resp !== nothing + status = resp.status + + # Don't retry successful responses + if status < 400 + return (false, 0.0) + end + else + # No response - check if we have a recoverable exception + if ex !== nothing + # If there's an exception, check if it's recoverable and if the method is idempotent + if HTTP.RetryRequest.isrecoverable(ex) && HTTP.RetryRequest.isidempotent(method) + verbose && @info "GitHub API exception, retrying in $(round(exponential_delay, digits=1))s" method=method exception=ex + return (true, exponential_delay) + end + end + # No response and no retryable exception + return (false, 0.0) + end + + # At this point we have a response with status >= 400 + # First let us see if we want to retry it. + + # Note: `String` takes ownership / removes the body, so we make a copy + body = String(copy(resp.body)) + is_primary_rate_limit = occursin("primary rate limit", lowercase(body)) && status in (403, 429) + is_secondary_rate_limit = occursin("secondary rate limit", lowercase(body)) && status in (403, 429) + + # `other_retry` is `HTTP.RetryRequest.retryable(status)` minus 403, + # since if it's not a rate-limit, we don't want to retry 403s. + other_retry = status in (408, 409, 429, 500, 502, 503, 504, 599) + + do_retry = HTTP.RetryRequest.isidempotent(method) && (is_primary_rate_limit || is_secondary_rate_limit || other_retry) + + if !do_retry + return (false, 0.0) + end + + # Now we know we want to retry. We need to decide how long to wait. + + # Get all rate limit headers + limit = HTTP.header(resp, "x-ratelimit-limit", "") + remaining = HTTP.header(resp, "x-ratelimit-remaining", "") + used = HTTP.header(resp, "x-ratelimit-used", "") + reset_time = HTTP.header(resp, "x-ratelimit-reset", "") + resource = HTTP.header(resp, "x-ratelimit-resource", "") + retry_after = HTTP.header(resp, "retry-after", "") + + msg = if is_primary_rate_limit + "GitHub API primary rate limit reached" + elseif is_secondary_rate_limit + "GitHub API secondary rate limit reached" + else + "GitHub API returned $status" + end + + # If retry-after header is present, respect it + delay_seconds = safe_tryparse(Float64, retry_after) + if delay_seconds !== nothing + delay_seconds = parse(Float64, retry_after) + verbose && @info "$msg, retrying in $(round(delay_seconds, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource retry_after=retry_after + return (true, delay_seconds) + end + + # If x-ratelimit-remaining is 0, wait until reset time + reset_timestamp = safe_tryparse(Float64, reset_time) + if remaining == "0" && reset_timestamp !== nothing + current_time = time() + if reset_timestamp > current_time + delay_seconds = reset_timestamp - current_time + 1.0 + verbose && @info "$msg, retrying in $(round(delay_seconds, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource retry_after=retry_after + return (true, delay_seconds) + end + end + + # If secondary rate limit hit without headers to guide us, + # make sure we wait at least a minute + delay_seconds = is_secondary_rate_limit ? max(60.0, exponential_delay) : exponential_delay + + # Fall back to exponential backoff + verbose && @info "$msg, retrying in $(round(delay_seconds, digits=1))s" method=method status=status + + return (true, delay_seconds) +end + +function wait_for_mutation_delay(auth_hash; sleep_fn=sleep, time_fn=time) + while true + now = time_fn() + local wait_time + # Checking & setting must be atomic to prevent races, so we use a lock + @lock MUTATION_LOCK begin + # we clear entries of `NEXT_AVAILABLE` older than `CLEAR_OLDER_THAN_SECONDS` + # so the global dict doesn't grow forever. But since this could be slow, and we're holding the lock, + # we first check `LAST_CLEARED_TIMESTAMP` to see if we've cleared it recently, and skip it if we have + last_cleared = LAST_CLEARED_TIMESTAMP[] + cutoff_time = now - CLEAR_OLDER_THAN_SECONDS + if cutoff_time >= last_cleared + filter!(kv -> kv[2] >= cutoff_time, NEXT_AVAILABLE) + LAST_CLEARED_TIMESTAMP[] = now + end + next_available_time = get(NEXT_AVAILABLE, auth_hash, 0.0) + wait_time = max(0.0, next_available_time - now) + if wait_time <= 0 # good to go + # Reserve the next available slot: now + 1 second + NEXT_AVAILABLE[auth_hash] = now + 1.0 + return nothing + end + end + sleep_fn(wait_time) + end +end + +struct RetryDelayException <: Exception + msg::String +end +Base.showerror(io::IO, e::RetryDelayException) = print(io, e.msg) + +""" + with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbose::Bool=true, sleep_fn=sleep, max_sleep_seconds::Real = 20*60) + +Generic retry wrapper that executes function `f()` with GitHub-specific retry logic. + +# Arguments +- `f`: Function to execute (should return HTTP.Response or throw exception) +- `method`: HTTP method for retry decision logic (default: "GET") +- `max_retries`: Maximum number of retry attempts (default: 5) +- `verbose`: Whether to log retry decisions (default: true) +- `sleep_fn`: Function to call for sleeping between retries (default: sleep). For testing, can be replaced with a custom function. +- `max_sleep_seconds::Real`: maximum number of seconds to sleep when delaying before retrying. If the intended retry delay exceeds `max_sleep_seconds` an exception is thrown instead. This parameter defaults to 20*60 (20 minutes). + +# Returns +Returns the result of `f()` if successful, or re-throws the final exception if all retries fail. + +# Example +```julia +result = with_retries(method="GET", verbose=false) do + HTTP.get("https://api.github.com/user", headers) +end +``` +""" +function with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbose::Bool=true, sleep_fn=sleep, max_sleep_seconds::Real = 60*20, respect_mutation_delay=true, auth_hash) + backoff = Base.ExponentialBackOff(n = max_retries+1) + method_upper = uppercase(method) + requires_mutation_throttle = respect_mutation_delay && method_upper in ("POST", "PATCH", "PUT", "DELETE") + for (attempt, exponential_delay) in enumerate(backoff) + last_try = attempt > max_retries + if requires_mutation_throttle + wait_for_mutation_delay(auth_hash; sleep_fn) + end + local r, ex + try + r = f() + ex = nothing + if last_try + return r + end + catch e + r = nothing + ex = e + if last_try + rethrow() + end + end + + # Check if we should retry based on this attempt + should_retry, sleep_seconds = github_retry_decision(method, r, ex, exponential_delay; verbose) + + if !should_retry + if ex !== nothing + throw(ex) + else + return r + end + end + if sleep_seconds > max_sleep_seconds + throw(RetryDelayException("Retry delay $(sleep_seconds) exceeds configured maximum ($(max_sleep_seconds) seconds)")) + end + if sleep_seconds > 0 + sleep_fn(sleep_seconds) + end + end +end + +function github_request(api::GitHubAPI, request_method::String, endpoint; auth = AnonymousAuth(), handle_error = true, - headers = Dict(), params = Dict(), allowredirects = true) + headers = Dict(), params = Dict(), allowredirects = true, + max_retries = 5, verbose = true, max_sleep_seconds = 20*60, respect_mutation_delay = true) authenticate_headers!(headers, auth) params = github2json(params) api_endpoint = api_uri(api, endpoint) _headers = convert(Dict{String, String}, headers) !haskey(_headers, "User-Agent") && (_headers["User-Agent"] = "GitHub-jl") - if request_method == HTTP.get - r = request_method(URIs.URI(api_endpoint, query = params), _headers, redirect = allowredirects, status_exception = false, idle_timeout=20) - else - r = request_method(string(api_endpoint), _headers, JSON.json(params), redirect = allowredirects, status_exception = false, idle_timeout=20) + auth_hash = get_auth_hash(auth) + r = with_retries(; method = request_method, max_retries, verbose, max_sleep_seconds, respect_mutation_delay, auth_hash) do + if request_method == "GET" + return HTTP.request(request_method, URIs.URI(api_endpoint, query = params), _headers; + redirect = allowredirects, status_exception = false, + idle_timeout = 20, retry = false) + else + return HTTP.request(request_method, string(api_endpoint), _headers, JSON.json(params); + redirect = allowredirects, status_exception = false, + idle_timeout = 20, retry = false) + end end + handle_error && handle_response_error(r) return r end -gh_get(api::GitHubAPI, endpoint = ""; options...) = github_request(api, HTTP.get, endpoint; options...) -gh_post(api::GitHubAPI, endpoint = ""; options...) = github_request(api, HTTP.post, endpoint; options...) -gh_put(api::GitHubAPI, endpoint = ""; options...) = github_request(api, HTTP.put, endpoint; options...) -gh_delete(api::GitHubAPI, endpoint = ""; options...) = github_request(api, HTTP.delete, endpoint; options...) -gh_patch(api::GitHubAPI, endpoint = ""; options...) = github_request(api, HTTP.patch, endpoint; options...) +gh_get(api::GitHubAPI, endpoint = ""; options...) = github_request(api, "GET", endpoint; options...) +gh_post(api::GitHubAPI, endpoint = ""; options...) = github_request(api, "POST", endpoint; options...) +gh_put(api::GitHubAPI, endpoint = ""; options...) = github_request(api, "PUT", endpoint; options...) +gh_delete(api::GitHubAPI, endpoint = ""; options...) = github_request(api, "DELETE", endpoint; options...) +gh_patch(api::GitHubAPI, endpoint = ""; options...) = github_request(api, "PATCH", endpoint; options...) gh_get_json(api::GitHubAPI, endpoint = ""; options...) = JSON.parse(HTTP.payload(gh_get(api, endpoint; options...), String)) gh_post_json(api::GitHubAPI, endpoint = ""; options...) = JSON.parse(HTTP.payload(gh_post(api, endpoint; options...), String)) @@ -113,15 +351,24 @@ end extract_page_url(link) = match(r"<.*?>", link).match[2:end-1] function github_paged_get(api, endpoint; page_limit = Inf, start_page = "", handle_error = true, - auth = AnonymousAuth(), headers = Dict(), params = Dict(), options...) + auth = AnonymousAuth(), headers = Dict(), params = Dict(), max_retries = 5, verbose = true, max_sleep_seconds = 20*60, respect_mutation_delay = true, options...) authenticate_headers!(headers, auth) _headers = convert(Dict{String, String}, headers) !haskey(_headers, "User-Agent") && (_headers["User-Agent"] = "GitHub-jl") + + auth_hash = get_auth_hash(auth) + # Helper function to make a get request with retries + function make_request_with_retries(url, headers) + return with_retries(; method = "GET", max_retries, verbose, max_sleep_seconds, respect_mutation_delay, auth_hash) do + HTTP.request("GET", url, headers; status_exception = false, retry = false) + end + end + if isempty(start_page) - r = gh_get(api, endpoint; handle_error = handle_error, headers = _headers, params = params, auth=auth, options...) + r = gh_get(api, endpoint; handle_error, headers = _headers, params, auth, max_retries, verbose, max_sleep_seconds, respect_mutation_delay, options...) else @assert isempty(params) "`start_page` kwarg is incompatible with `params` kwarg" - r = HTTP.get(start_page, headers = _headers) + r = make_request_with_retries(start_page, _headers) end results = HTTP.Response[r] page_data = Dict{String, String}() @@ -131,7 +378,7 @@ function github_paged_get(api, endpoint; page_limit = Inf, start_page = "", hand links = get_page_links(r) next_index = find_page_link(links, "next") next_index == 0 && break - r = HTTP.get(extract_page_url(links[next_index]), headers = _headers) + r = make_request_with_retries(extract_page_url(links[next_index]), _headers) handle_error && handle_response_error(r) push!(results, r) page_count += 1 @@ -184,7 +431,7 @@ function handle_response_error(r::HTTP.Response) errors = get(data, "errors", "") catch end - error("Error found in GitHub reponse:\n", + error("Error found in GitHub response:\n", "\tStatus Code: $(r.status)\n", ((isempty(message) && isempty(errors)) ? ("\tBody: $body",) : @@ -210,4 +457,4 @@ function check_disallowed_name_pattern(str::AbstractString) end return str -end \ No newline at end of file +end diff --git a/test/auth_tests.jl b/test/auth_tests.jl index e6d07cc..107101c 100644 --- a/test/auth_tests.jl +++ b/test/auth_tests.jl @@ -24,3 +24,86 @@ auth2 = GitHub.JWTAuth(1234, key; iat = DateTime("2016-9-15T14:00")) @test_throws ArgumentError GitHub.OAuth2("ghp_\n") @test_throws ArgumentError GitHub.JWTAuth("ghp_\n") + +@testset "uint64_hash tests" begin + # Test that uint64_hash produces UInt64 values + @test typeof(GitHub.uint64_hash("test")) == UInt64 + + # Test that same input produces same output (deterministic) + @test GitHub.uint64_hash("hello") == GitHub.uint64_hash("hello") + + # Test that different inputs produce different outputs + @test GitHub.uint64_hash("hello") != GitHub.uint64_hash("world") + + # Test with empty string + @test typeof(GitHub.uint64_hash("")) == UInt64 + + # Test with special characters + @test typeof(GitHub.uint64_hash("!@#\$%^&*()")) == UInt64 + @test GitHub.uint64_hash("test!") != GitHub.uint64_hash("test") +end + +@testset "get_auth_hash tests" begin + @testset "OAuth2 hashing" begin + # Same token should produce same hash + auth1 = GitHub.OAuth2("ghp_testtoken123456") + auth2 = GitHub.OAuth2("ghp_testtoken123456") + @test GitHub.get_auth_hash(auth1) == GitHub.get_auth_hash(auth2) + @test typeof(GitHub.get_auth_hash(auth1)) == UInt64 + + # Different tokens should produce different hashes + auth3 = GitHub.OAuth2("ghp_differenttoken789") + @test GitHub.get_auth_hash(auth1) != GitHub.get_auth_hash(auth3) + end + + @testset "UsernamePassAuth hashing" begin + # Same credentials should produce same hash + auth1 = GitHub.UsernamePassAuth("user1", "pass1") + auth2 = GitHub.UsernamePassAuth("user1", "pass1") + @test GitHub.get_auth_hash(auth1) == GitHub.get_auth_hash(auth2) + @test typeof(GitHub.get_auth_hash(auth1)) == UInt64 + + # Different username should produce different hash + auth3 = GitHub.UsernamePassAuth("user2", "pass1") + @test GitHub.get_auth_hash(auth1) != GitHub.get_auth_hash(auth3) + + # Different password should produce different hash + auth4 = GitHub.UsernamePassAuth("user1", "pass2") + @test GitHub.get_auth_hash(auth1) != GitHub.get_auth_hash(auth4) + end + + @testset "AnonymousAuth hashing" begin + # AnonymousAuth should always return UInt64(0) + auth1 = GitHub.AnonymousAuth() + auth2 = GitHub.AnonymousAuth() + @test GitHub.get_auth_hash(auth1) == UInt64(0) + @test GitHub.get_auth_hash(auth2) == UInt64(0) + @test GitHub.get_auth_hash(auth1) == GitHub.get_auth_hash(auth2) + end + + @testset "JWTAuth hashing" begin + # Same JWT should produce same hash + auth1 = GitHub.JWTAuth(1234, joinpath(dirname(@__FILE__), "not_a_real_key.pem"); + iat = DateTime("2016-9-15T14:00")) + auth2 = GitHub.JWTAuth(1234, joinpath(dirname(@__FILE__), "not_a_real_key.pem"); + iat = DateTime("2016-9-15T14:00")) + @test GitHub.get_auth_hash(auth1) == GitHub.get_auth_hash(auth2) + @test typeof(GitHub.get_auth_hash(auth1)) == UInt64 + + # Different iat should produce different hash (different JWT) + auth3 = GitHub.JWTAuth(1234, joinpath(dirname(@__FILE__), "not_a_real_key.pem"); + iat = DateTime("2016-9-15T14:01")) + @test GitHub.get_auth_hash(auth1) != GitHub.get_auth_hash(auth3) + end + + @testset "Different auth types produce different hashes" begin + oauth = GitHub.OAuth2("ghp_samestring") + anon = GitHub.AnonymousAuth() + userpass = GitHub.UsernamePassAuth("user", "pass") + + # All should be different from each other + @test GitHub.get_auth_hash(oauth) != GitHub.get_auth_hash(anon) + @test GitHub.get_auth_hash(oauth) != GitHub.get_auth_hash(userpass) + @test GitHub.get_auth_hash(anon) != GitHub.get_auth_hash(userpass) + end +end diff --git a/test/retries.jl b/test/retries.jl new file mode 100644 index 0000000..1639804 --- /dev/null +++ b/test/retries.jl @@ -0,0 +1,595 @@ +using Test +using HTTP +using GitHub + +primary_rate_limit_body = Vector{UInt8}("primary rate limit") +secondary_rate_limit_body = Vector{UInt8}("secondary rate limit") + +@testset "github_retry_decision" begin + + @testset "HTTP.jl recoverable exceptions" begin + # Test with a potentially recoverable exception (let HTTP.jl decide) + # We'll just test that our function handles exceptions without crashing + network_ex = Base.IOError("connection reset", 104) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET", nothing, network_ex, 2.0; verbose=false) + # The actual retry decision depends on HTTP.jl's isrecoverable and isidempotent functions + @test typeof(should_retry) == Bool + @test sleep_seconds >= 0.0 + + # Test with non-recoverable exception + non_recoverable_ex = ArgumentError("invalid argument") + should_retry, sleep_seconds = GitHub.github_retry_decision("GET", nothing, non_recoverable_ex, 2.0; verbose=false) + @test should_retry == false + @test sleep_seconds == 0.0 + end + + @testset "No response and no exception" begin + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",nothing, nothing, 2.0; verbose=false) + @test should_retry == false + @test sleep_seconds == 0.0 + end + + @testset "Successful responses" begin + for status in [200, 201, 204] + resp = HTTP.Response(status) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 2.0; verbose=false) + @test should_retry == false + @test sleep_seconds == 0.0 + end + end + + @testset "Primary rate limit - x-ratelimit-remaining = 0" begin + + # Test with future reset time - use fixed timestamp to avoid race conditions + future_time = "1900000000" # Fixed timestamp in the future (year 2030) + resp = HTTP.Response(403, [ + "x-ratelimit-remaining" => "0", + "x-ratelimit-reset" => future_time + ], primary_rate_limit_body) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET", resp, nothing, 2.0; verbose=false) + @test should_retry == true + @test sleep_seconds > 100000 # Should be a large delay since reset time is far in future + + # Test with past reset time (should use exponential backoff) + past_time = "1000000000" # Fixed timestamp in the past (year 2001) + resp2 = HTTP.Response(403, [ + "x-ratelimit-remaining" => "0", + "x-ratelimit-reset" => past_time + ], primary_rate_limit_body) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET", resp2, nothing, 5.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 5.0 # Should use the exponential delay + end + + @testset "Secondary rate limit - retry-after header" begin + + # Test secondary rate limit with retry-after + resp = HTTP.Response(429, ["retry-after" => "30"]; body = secondary_rate_limit_body) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 2.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 30.0 # Should use retry-after value + + # Test with just retry-after header (no body message) + resp2 = HTTP.Response(429, ["retry-after" => "15"]) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp2, nothing, 2.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 15.0 + end + + @testset "Secondary rate limit - no headers" begin + resp = HTTP.Response(429; body = secondary_rate_limit_body) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 2.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 60.0 # Should wait at least 1 minute + + # Test with exponential delay greater than 60 seconds + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 120.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 120.0 # Should use the larger exponential delay + end + + @testset "Secondary rate limit - x-ratelimit-remaining = 0" begin + + # Test secondary rate limit with reset time - use fixed timestamp to avoid race conditions + future_time = "1900000000" # Fixed timestamp in the future (year 2030) + resp = HTTP.Response(403, [ + "x-ratelimit-remaining" => "0", + "x-ratelimit-reset" => future_time + ], secondary_rate_limit_body) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 5.0; verbose=false) + @test should_retry == true + @test sleep_seconds > 100000 # Should be a large delay since reset time is far in future + end + + @testset "429 - exponential backoff" begin + # 429 without specific headers or body + resp = HTTP.Response(429, []) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 4.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 4.0 # Should use exponential delay + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 8.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 8.0 + end + + @testset "Other HTTP errors" begin + for status in [408, 409, 500, 502, 503, 504, 599] + resp = HTTP.Response(status, []) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 3.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 3.0 # Should use exponential delay + end + end + + @testset "Non-retryable client errors" begin + for status in [400, 401, 403, 404, 422] + resp = HTTP.Response(status, []) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 1.0; verbose=false) + @test should_retry == false + @test sleep_seconds == 0.0 + end + end + + @testset "Invalid header values" begin + # Test with invalid retry-after header (should use secondary rate limit minimum) + resp1 = HTTP.Response(429, ["retry-after" => "invalid"], secondary_rate_limit_body) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp1, nothing, 2.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 60.0 # Falls back to secondary rate limit minimum (1 minute) + + # Test with invalid reset time (should fall back to secondary min) + resp2 = HTTP.Response(403, [ + "x-ratelimit-remaining" => "0", + "x-ratelimit-reset" => "invalid" + ], secondary_rate_limit_body) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp2, nothing, 3.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 60.0 # minimum for secondary rate limit + end + + @testset "Rate limit header precedence" begin + # retry-after should take precedence over x-ratelimit-reset + future_time = "1900000000" # Fixed timestamp (doesn't matter since retry-after takes precedence) + resp = HTTP.Response(429, [ + "retry-after" => "5", + "x-ratelimit-remaining" => "0", + "x-ratelimit-reset" => future_time + ]) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 10.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 5.0 # Should use retry-after, not reset time + end + + @testset "Rate limit remaining non-zero" begin + + # Should use exponential backoff when x-ratelimit-remaining is not "0" + future_time = "1900000000" # Fixed timestamp (doesn't matter since remaining != "0") + resp = HTTP.Response(403, [ + "x-ratelimit-remaining" => "5", + "x-ratelimit-reset" => future_time + ], primary_rate_limit_body) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 3.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 3.0 # Should use exponential backoff, not reset time + end + + @testset "Exception with successful response" begin + + # If we have both an exception and a response, the response should take precedence + resp = HTTP.Response(200) + ex = Base.IOError("some error", 0) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, ex, 2.0; verbose=false) + @test should_retry == false # Success response should not retry + @test sleep_seconds == 0.0 + end + + @testset "Request method considerations" begin + # Test with different HTTP methods to ensure retryable logic works + + # Network exception with different methods + network_ex = Base.IOError("connection refused", 111) + + # Test that both work without crashing (actual retry depends on HTTP.jl internals) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET", nothing, network_ex, 1.0; verbose=false) + @test typeof(should_retry) == Bool + @test sleep_seconds >= 0.0 + + # POST behavior depends on HTTP.jl's isidempotent() function (non-idempotent methods typically don't retry) + should_retry, sleep_seconds = GitHub.github_retry_decision("POST", nothing, network_ex, 1.0; verbose=false) + @test typeof(should_retry) == Bool + @test sleep_seconds >= 0.0 + end +end + + +@testset "with_retries function tests" begin + @testset "Success after retries" begin + call_count = Ref(0) + sleep_calls = Float64[] + + # Custom sleep function that records sleep durations + function test_sleep(seconds) + push!(sleep_calls, seconds) + end + + result = GitHub.with_retries(method="GET", max_retries=2, verbose=false, sleep_fn=test_sleep, auth_hash=UInt64(0)) do + call_count[] += 1 + if call_count[] < 3 + # Return a rate limit response for first 2 attempts + return HTTP.Response(429, ["retry-after" => "1"]) + else + # Success on 3rd attempt + return HTTP.Response(200) + end + end + + @test result.status == 200 + @test call_count[] ==3 + @test length(sleep_calls) == 2 # Should have slept twice + @test all(s -> s >= 1.0, sleep_calls) # Should respect retry-after + end + + @testset "Exception handling" begin + call_count = Ref(0) + sleep_calls = Float64[] + + function test_sleep(seconds) + push!(sleep_calls, seconds) + end + + # Test with recoverable exception (for GET method) + result = GitHub.with_retries(method="GET", max_retries=2, verbose=false, sleep_fn=test_sleep, auth_hash=UInt64(0)) do + call_count[] += 1 + if call_count[] < 3 + throw(Base.IOError("connection refused", 111)) + else + return HTTP.Response(200) + end + end + + @test result.status == 200 + @test call_count[] ==3 + @test length(sleep_calls) == 2 + end + + @testset "Non-retryable exceptions" begin + # Test that ArgumentError is not retried + @test_throws ArgumentError GitHub.with_retries(method="GET", verbose=false, auth_hash=UInt64(0)) do + throw(ArgumentError("invalid argument")) + end + end + + @testset "Max retries exhausted" begin + call_count = Ref(0) + sleep_calls = Float64[] + + function test_sleep(seconds) + push!(sleep_calls, seconds) + end + + # Test exhausting retries with rate limit responses + result = GitHub.with_retries(method="GET", max_retries=2, verbose=false, sleep_fn=test_sleep, auth_hash=UInt64(0)) do + call_count[] += 1 + return HTTP.Response(429, ["retry-after" => "0.1"]) # Always return rate limit + end + + @test result.status == 429 # Should return the final failed response + @test call_count[] ==3 # Initial + 2 retries + @test length(sleep_calls) == 2 + end + + @testset "Max retries exhausted with exception" begin + call_count = Ref(0) + + # Test exhausting retries with exceptions + @test_throws Base.IOError GitHub.with_retries(method="GET", max_retries=1, verbose=false, sleep_fn=x->nothing, auth_hash=UInt64(0)) do + call_count[] += 1 + throw(Base.IOError("persistent error", 104)) + end + + @test call_count[] ==2 # Initial + 1 retry + end + + @testset "Non-idempotent methods" begin + call_count = Ref(0) + + # POST requests should not retry on exceptions (non-idempotent) + @test_throws Base.IOError GitHub.with_retries(method="POST", verbose=false, auth_hash=UInt64(0)) do + call_count[] += 1 + throw(Base.IOError("connection refused", 111)) + end + + @test call_count[] ==1 # Should not retry + end + + @testset "GitHub rate limit handling" begin + call_count = Ref(0) + sleep_calls = Float64[] + + function test_sleep(seconds) + push!(sleep_calls, seconds) + end + + # Test primary rate limit with reset time + current_time = time() + reset_time = string(Int(round(current_time)) + 500000000) # 500000000 seconds from now + + result = GitHub.with_retries(method="GET", max_retries=1, verbose=false, sleep_fn=test_sleep, max_sleep_seconds=2*500000000, auth_hash=UInt64(0)) do + call_count[] += 1 + if call_count[] == 1 + return HTTP.Response(403, [ + "x-ratelimit-remaining" => "0", + "x-ratelimit-reset" => reset_time + ], primary_rate_limit_body) + else + return HTTP.Response(200) + end + end + + @test result.status == 200 + @test call_count[] ==2 + @test length(sleep_calls) == 1 + @test sleep_calls[1] >= 500000000 # Should wait at least until reset time + + + @test_throws RetryDelayException GitHub.with_retries(method="GET", max_retries=1, verbose=false, sleep_fn=test_sleep, auth_hash=UInt64(0)) do + return HTTP.Response(403, [ + "x-ratelimit-remaining" => "0", + "x-ratelimit-reset" => reset_time + ], primary_rate_limit_body) + end + end + + @testset "Secondary rate limit with retry-after" begin + call_count = Ref(0) + sleep_calls = Float64[] + + function test_sleep(seconds) + push!(sleep_calls, seconds) + end + + body = """{"message": "You have exceeded a secondary rate limit."}""" + + result = GitHub.with_retries(method="GET", max_retries=1, verbose=false, sleep_fn=test_sleep, auth_hash=UInt64(0)) do + call_count[] += 1 + if call_count[] == 1 + return HTTP.Response(429, ["retry-after" => "3"]; body = Vector{UInt8}(body)) + else + return HTTP.Response(200) + end + end + + @test result.status == 200 + @test call_count[] == 2 + @test length(sleep_calls) == 1 + @test sleep_calls[1] == 3.0 # Should respect retry-after exactly + end + + @testset "Non-retryable HTTP errors" begin + call_count = Ref(0) + + # 404 should not be retried + result = GitHub.with_retries(method="GET", verbose=false, auth_hash=UInt64(0)) do + call_count[] += 1 + return HTTP.Response(404) + end + + @test result.status == 404 + @test call_count[] ==1 # Should not retry + end + + @testset "Zero max_retries" begin + call_count = Ref(0) + + # With max_retries=0, should only try once + result = GitHub.with_retries(method="GET", max_retries=0, verbose=false, auth_hash=UInt64(0)) do + call_count[] += 1 + return HTTP.Response(429) # Rate limit response + end + + @test result.status == 429 + @test call_count[] ==1 # Should not retry at all + end + + @testset "Sleep function not called on success" begin + sleep_called = Ref(false) + + function test_sleep(seconds) + sleep_called[] = true + end + + result = GitHub.with_retries(method="GET", verbose=false, sleep_fn=test_sleep, auth_hash=UInt64(0)) do + return HTTP.Response(200) + end + + @test result.status == 200 + @test !sleep_called[] + end + +end + +@testset "wait_for_mutation_delay" begin + # Mock time and sleep to avoid real delays + times = [1.0, 3.0, 4.0, 5.0] + time_calls = Ref(0) + sleep_calls = Float64[] + + mock_time() = (time_calls[] += 1; times[time_calls[]]) + mock_sleep(t) = push!(sleep_calls, t) + + # Use a test auth hash + test_auth_hash = UInt64(12345) + + # Reset state for clean testing + @lock GitHub.MUTATION_LOCK begin + empty!(GitHub.NEXT_AVAILABLE) + end + + # First call should not wait (no reservation exists) + GitHub.wait_for_mutation_delay(test_auth_hash; sleep_fn=mock_sleep, time_fn=mock_time) + @test length(sleep_calls) == 0 + @test time_calls[] == 1 + + # Second call at t=3.0, next_available=2.0, so no wait needed + GitHub.wait_for_mutation_delay(test_auth_hash; sleep_fn=mock_sleep, time_fn=mock_time) + @test length(sleep_calls) == 0 # No sleep needed since enough time passed + + # Reset for third test - force a wait scenario + @lock GitHub.MUTATION_LOCK begin + GitHub.NEXT_AVAILABLE[test_auth_hash] = 4.5 # Next available at 4.5 + end + time_calls[] = 2 # Start at time 4.0 + empty!(sleep_calls) + + GitHub.wait_for_mutation_delay(test_auth_hash; sleep_fn=mock_sleep, time_fn=mock_time) + @test length(sleep_calls) == 1 + @test sleep_calls[1] ≈ 0.5 # Should wait 4.5 - 4.0 = 0.5 seconds +end + +@testset "with_retries mutation delay integration" begin + sleep_calls = Float64[] + mock_sleep(t) = push!(sleep_calls, t) + + # Use test auth hashes + test_auth_hash = UInt64(12345) + + # Reset mutation state + @lock GitHub.MUTATION_LOCK begin + empty!(GitHub.NEXT_AVAILABLE) + end + + # Test: GET method should not trigger mutation delay + empty!(sleep_calls) + result = GitHub.with_retries(method="GET", max_retries=0, verbose=false, sleep_fn=mock_sleep, auth_hash=test_auth_hash) do + HTTP.Response(200) + end + @test result.status == 200 + @test length(sleep_calls) == 0 # No mutation delay for GET + + # Test: POST method should trigger mutation delay (but first call won't wait) + empty!(sleep_calls) + result = GitHub.with_retries(method="POST", max_retries=0, verbose=false, sleep_fn=mock_sleep, auth_hash=test_auth_hash) do + HTTP.Response(200) + end + @test result.status == 200 + @test length(sleep_calls) == 0 # First POST doesn't wait + + # Test: respect_mutation_delay=false should bypass delay + empty!(sleep_calls) + result = GitHub.with_retries(method="POST", max_retries=0, verbose=false, sleep_fn=mock_sleep, respect_mutation_delay=false, auth_hash=test_auth_hash) do + HTTP.Response(200) + end + @test result.status == 200 + @test length(sleep_calls) == 0 # Delay bypassed +end + +@testset "Per-auth mutation delay isolation" begin + # Test that different auth hashes have independent mutation delays + sleep_calls = Float64[] + mock_sleep(t) = push!(sleep_calls, t) + + # Create two different auth hashes + auth_hash_1 = UInt64(111) + auth_hash_2 = UInt64(222) + + # Reset mutation state + @lock GitHub.MUTATION_LOCK begin + empty!(GitHub.NEXT_AVAILABLE) + end + + # First POST with auth_hash_1 should not wait + result1 = GitHub.with_retries(method="POST", max_retries=0, verbose=false, sleep_fn=mock_sleep, auth_hash=auth_hash_1) do + HTTP.Response(200) + end + @test result1.status == 200 + @test length(sleep_calls) == 0 + + # Immediately following POST with auth_hash_2 should also not wait (different auth) + result2 = GitHub.with_retries(method="POST", max_retries=0, verbose=false, sleep_fn=mock_sleep, auth_hash=auth_hash_2) do + HTTP.Response(200) + end + @test result2.status == 200 + @test length(sleep_calls) == 0 # Still no sleep - different auth + + # Verify both auth hashes have separate reservations + @lock GitHub.MUTATION_LOCK begin + @test haskey(GitHub.NEXT_AVAILABLE, auth_hash_1) + @test haskey(GitHub.NEXT_AVAILABLE, auth_hash_2) + @test GitHub.NEXT_AVAILABLE[auth_hash_1] != GitHub.NEXT_AVAILABLE[auth_hash_2] + end +end + +@testset "Reservation system prevents needless wakeups" begin + # Demonstrate how 3 threads arriving nearly simultaneously get properly serialized + # Thread 1 at t=10.0: reserves until t=11.0 + # Thread 2 at t=10.1: sees reservation at t=11.0, waits 0.9s, reserves until t=12.0 + # Thread 3 at t=10.2: sees reservation at t=12.0, waits 1.8s, reserves until t=13.0 + # Without reservations, threads 2 and 3 would compute similar short waits based on last_mutation=10.0, + # so one of them would need to wakeup, compute a new wait, and sleep again. + + test_hash = UInt64(999) + + @lock GitHub.MUTATION_LOCK begin + empty!(GitHub.NEXT_AVAILABLE) + end + + # Thread 1 at t=10.0 + times1 = [10.0] + time_calls1 = Ref(0) + GitHub.wait_for_mutation_delay(test_hash; sleep_fn=t->nothing, time_fn=()->(time_calls1[]+=1; times1[time_calls1[]])) + @test GitHub.NEXT_AVAILABLE[test_hash] == 11.0 + + # Thread 2 at t=10.1 + sleep_calls2 = Float64[] + times2 = [10.1, 11.0] + time_calls2 = Ref(0) + GitHub.wait_for_mutation_delay(test_hash; sleep_fn=t->push!(sleep_calls2,t), time_fn=()->(time_calls2[]+=1; times2[time_calls2[]])) + @test sleep_calls2[1] ≈ 0.9 # Waits until t=11.0 + @test GitHub.NEXT_AVAILABLE[test_hash] == 12.0 + + # Thread 3 at t=10.2 + sleep_calls3 = Float64[] + times3 = [10.2, 12.0] + time_calls3 = Ref(0) + GitHub.wait_for_mutation_delay(test_hash; sleep_fn=t->push!(sleep_calls3,t), time_fn=()->(time_calls3[]+=1; times3[time_calls3[]])) + @test sleep_calls3[1] ≈ 1.8 # Waits until t=12.0 (NOT just 0.9s from last_mutation) + @test GitHub.NEXT_AVAILABLE[test_hash] == 13.0 +end + +@testset "Automatic cleanup integration" begin + times = [0.0, 1.0, 610.0] + time_calls = Ref(0) + mock_time() = (time_calls[] += 1; times[time_calls[]]) + + hash1, hash2 = UInt64(123), UInt64(456) + + @lock GitHub.MUTATION_LOCK begin + empty!(GitHub.NEXT_AVAILABLE) + GitHub.LAST_CLEARED_TIMESTAMP[] = 0.0 + end + + # Add two reservations: hash1->1.0, hash2->2.0 + GitHub.wait_for_mutation_delay(hash1; sleep_fn=t->nothing, time_fn=mock_time) + GitHub.wait_for_mutation_delay(hash2; sleep_fn=t->nothing, time_fn=mock_time) + + # At t=610.0, cutoff=10 triggers cleanup; hash2 (next_available=2.0) should be removed + GitHub.wait_for_mutation_delay(hash1; sleep_fn=t->nothing, time_fn=mock_time) + + @lock GitHub.MUTATION_LOCK begin + @test haskey(GitHub.NEXT_AVAILABLE, hash1) + @test !haskey(GitHub.NEXT_AVAILABLE, hash2) # Removed (2.0 < cutoff 10.0) + @test GitHub.NEXT_AVAILABLE[hash1] == 611.0 # Reserved at t=610.0, so next = 611.0 + @test GitHub.LAST_CLEARED_TIMESTAMP[] == 610.0 + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 83f5f38..5d79a22 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,6 +9,7 @@ using GitHub.Checks include("event_tests.jl") include("read_only_api_tests.jl") include("auth_tests.jl") + include("retries.jl") @testset "SSH keygen" begin pubkey, privkey = GitHub.genkeys(keycomment="GitHub.jl")