diff --git a/lib/tesla/middleware/retry.ex b/lib/tesla/middleware/retry.ex index c6d787cc..b3f5ba19 100644 --- a/lib/tesla/middleware/retry.ex +++ b/lib/tesla/middleware/retry.ex @@ -52,6 +52,10 @@ defmodule Tesla.Middleware.Retry do the context: options + `:retries` (defaults to a match on `{:error, _reason}`) - `:jitter_factor` - additive noise proportionality constant (float between 0 and 1, defaults to 0.2) + - `:use_retry_after_header` - whether to use the Retry-After header to determine the minimum + delay before the next retry. If the delay from the header exceeds max_delay, no further + retries are attempted. Invalid Retry-After headers are ignored. + (boolean, defaults to false) """ @behaviour Tesla.Middleware @@ -60,7 +64,8 @@ defmodule Tesla.Middleware.Retry do delay: 50, max_retries: 5, max_delay: 5_000, - jitter_factor: 0.2 + jitter_factor: 0.2, + use_retry_after_header: false ] @impl Tesla.Middleware @@ -73,23 +78,21 @@ defmodule Tesla.Middleware.Retry do max_retries: integer_opt!(opts, :max_retries, 0), max_delay: integer_opt!(opts, :max_delay, 1), should_retry: should_retry_opt!(opts), - jitter_factor: float_opt!(opts, :jitter_factor, 0, 1) + jitter_factor: float_opt!(opts, :jitter_factor, 0, 1), + use_retry_after_header: boolean_opt!(opts, :use_retry_after_header) } retry(env, next, context) end - # If we have max retries set to 0 don't retry defp retry(env, next, %{max_retries: 0}), do: Tesla.run(env, next) - # If we're on our last retry then just run and don't handle the error defp retry(env, next, %{max_retries: max, retries: max} = context) do env |> put_retry_count_opt(context) |> Tesla.run(next) end - # Otherwise we retry if we get a retriable error defp retry(env, next, context) do res = env @@ -98,7 +101,12 @@ defmodule Tesla.Middleware.Retry do {:arity, should_retry_arity} = :erlang.fun_info(context.should_retry, :arity) + context = Map.put(context, :retry_after, retry_after(res, context)) + cond do + context.retry_after != nil and context.max_delay < context.retry_after -> + res + should_retry_arity == 1 and context.should_retry.(res) -> do_retry(env, next, context) @@ -110,14 +118,105 @@ defmodule Tesla.Middleware.Retry do end end + defp retry_after({_, %Tesla.Env{} = env}, %{use_retry_after_header: true}) do + case Tesla.get_header(env, "retry-after") do + nil -> + nil + + header -> + case retry_delay_in_ms(header) do + {:ok, delay_ms} -> delay_ms + {:error, _} -> nil + end + end + end + + defp retry_after(_res, _context) do + nil + end + + # Credits to @wojtekmach + defp retry_delay_in_ms(delay_value) do + case Integer.parse(delay_value) do + {seconds, ""} -> + {:ok, :timer.seconds(seconds)} + + :error -> + case parse_http_datetime(delay_value) do + {:ok, date_time} -> + {:ok, + date_time + |> DateTime.diff(DateTime.utc_now(), :millisecond) + |> max(0)} + + {:error, _} = error -> + error + end + end + end + + @month_numbers %{ + "Jan" => "01", + "Feb" => "02", + "Mar" => "03", + "Apr" => "04", + "May" => "05", + "Jun" => "06", + "Jul" => "07", + "Aug" => "08", + "Sep" => "09", + "Oct" => "10", + "Nov" => "11", + "Dec" => "12" + } + + defp parse_http_datetime(datetime) do + case String.split(datetime, " ") do + [_day_of_week, day, month, year, time, "GMT"] -> + case @month_numbers[month] do + nil -> + {:error, + "cannot parse \"retry-after\" header value #{inspect(datetime)} as datetime, reason: invalid month"} + + month_number -> + date = year <> "-" <> month_number <> "-" <> day + + case DateTime.from_iso8601(date <> " " <> time <> "Z") do + {:ok, valid_datetime, 0} -> + {:ok, valid_datetime} + + {:error, reason} -> + {:error, + "cannot parse \"retry-after\" header value #{inspect(datetime)} as datetime, reason: #{reason}"} + end + end + + _ -> + {:error, + "cannot parse \"retry-after\" header value #{inspect(datetime)} as datetime, reason: header is not in HTTP-date or integer format"} + end + end + defp do_retry(env, next, context) do - backoff(context.max_delay, context.delay, context.retries, context.jitter_factor) + case context.retry_after do + nil -> + exponential_backoff( + context.max_delay, + context.delay, + context.retries, + context.jitter_factor + ) + + retry_after -> + retry_after_with_jitter(context.max_delay, retry_after, context.jitter_factor) + end + context = update_in(context, [:retries], &(&1 + 1)) retry(env, next, context) end # Exponential backoff with jitter - defp backoff(cap, base, attempt, jitter_factor) do + defp exponential_backoff(cap, base, attempt, jitter_factor) do factor = Bitwise.bsl(1, attempt) max_sleep = min(cap, base * factor) @@ -140,6 +239,16 @@ defmodule Tesla.Middleware.Retry do %{env | opts: opts} end + @spec retry_after_with_jitter(any(), integer(), number()) :: :ok + def retry_after_with_jitter(cap, retry_after, jitter_factor) do + # Ensures that the added jitter never exceeds the max delay + max = min(cap, retry_after * (1 + jitter_factor)) + + jitter = trunc((max - retry_after) * :rand.uniform()) + + :timer.sleep(retry_after + jitter) + end + defp integer_opt!(opts, key, min) do case Keyword.fetch(opts, key) do {:ok, value} when is_integer(value) and value >= min -> value @@ -156,6 +265,14 @@ defmodule Tesla.Middleware.Retry do end end + defp boolean_opt!(opts, key) do + case Keyword.fetch(opts, key) do + {:ok, value} when is_boolean(value) -> value + {:ok, invalid} -> invalid_boolean(key, invalid) + :error -> @defaults[key] + end + end + defp should_retry_opt!(opts) do case Keyword.get(opts, :should_retry, &match?({:error, _}, &1)) do should_retry_fun when is_function(should_retry_fun, 1) -> @@ -180,6 +297,10 @@ defmodule Tesla.Middleware.Retry do ) end + defp invalid_boolean(key, value) do + raise(ArgumentError, "expected :#{key} to be a boolean, got #{inspect(value)}") + end + defp invalid_should_retry_fun(value) do raise( ArgumentError, diff --git a/test/tesla/middleware/retry_test.exs b/test/tesla/middleware/retry_test.exs index 90a2bc6e..47ed828a 100644 --- a/test/tesla/middleware/retry_test.exs +++ b/test/tesla/middleware/retry_test.exs @@ -2,23 +2,146 @@ defmodule Tesla.Middleware.RetryTest do use ExUnit.Case, async: false defmodule LaggyAdapter do - def start_link, do: Agent.start_link(fn -> 0 end, name: __MODULE__) - def reset(), do: Agent.update(__MODULE__, fn _ -> 0 end) + def start_link, + do: + Agent.start_link(fn -> %{retries: 0, start_time: DateTime.utc_now()} end, + name: __MODULE__ + ) + + def reset(), + do: Agent.update(__MODULE__, fn _ -> %{retries: 0, start_time: DateTime.utc_now()} end) def call(env, _opts) do - Agent.get_and_update(__MODULE__, fn retries -> + Agent.get_and_update(__MODULE__, fn %{retries: retries, start_time: start_time} = state -> + ms_elapsed = DateTime.diff(DateTime.utc_now(), start_time, :millisecond) + response = case env.url do - "/ok" -> {:ok, env} - "/maybe" when retries == 2 -> {:error, :nxdomain} - "/maybe" when retries < 5 -> {:error, :econnrefused} - "/maybe" -> {:ok, env} - "/nope" -> {:error, :econnrefused} - "/retry_status" when retries < 5 -> {:ok, %{env | status: 500}} - "/retry_status" -> {:ok, %{env | status: 200}} + "/ok" -> + {:ok, env} + + "/maybe" when retries == 2 -> + {:error, :nxdomain} + + "/maybe" when retries < 5 -> + {:error, :econnrefused} + + "/maybe" -> + {:ok, env} + + "/nope" -> + {:error, :econnrefused} + + "/retry_status" when retries < 5 -> + {:ok, %{env | status: 500}} + + "/retry_status" -> + {:ok, %{env | status: 200}} + + "/retry_after_seconds" when ms_elapsed < 1000 -> + {:ok, %{env | status: 429, headers: [{"retry-after", "2"} | env.headers]}} + + "/retry_after_seconds" -> + {:ok, %{env | status: 200}} + + "/retry_after_date" when ms_elapsed < 1000 -> + {:ok, + %{ + env + | status: 429, + headers: [ + {"retry-after", + Calendar.strftime( + DateTime.add(start_time, 2, :second), + "%a, %d %b %Y %H:%M:%S GMT" + )} + | env.headers + ] + }} + + "/retry_after_date" -> + {:ok, %{env | status: 200}} + + "/retry_after_invalid_format" when retries < 5 -> + {:ok, %{env | status: 429, headers: [{"retry-after", "foo"} | env.headers]}} + + "/retry_after_invalid_format" -> + {:ok, %{env | status: 200}} + + "/retry_after_invalid_month" when retries < 5 -> + {:ok, + %{ + env + | status: 429, + headers: [ + {"retry-after", + Calendar.strftime( + DateTime.add(start_time, 2, :second), + "%a, %d Foo %Y %H:%M:%S GMT" + )} + | env.headers + ] + }} + + "/retry_after_invalid_month" -> + {:ok, %{env | status: 200}} + + "/retry_after_invalid_day" when retries < 5 -> + {:ok, + %{ + env + | status: 429, + headers: [ + {"retry-after", + Calendar.strftime( + DateTime.add(start_time, 2, :second), + "%a, Foo %b %Y %H:%M:%S GMT" + )} + | env.headers + ] + }} + + "/retry_after_invalid_day" -> + {:ok, %{env | status: 200}} + + "/retry_after_invalid_year" when retries < 5 -> + {:ok, + %{ + env + | status: 429, + headers: [ + {"retry-after", + Calendar.strftime( + DateTime.add(start_time, 2, :second), + "%a, %d %b Foo %H:%M:%S GMT" + )} + | env.headers + ] + }} + + "/retry_after_invalid_year" -> + {:ok, %{env | status: 200}} + + "/retry_after_invalid_time" when retries < 5 -> + {:ok, + %{ + env + | status: 429, + headers: [ + {"retry-after", + Calendar.strftime( + DateTime.add(start_time, 2, :second), + "%a, %d %b %Y Foo:Bar:%S GMT" + )} + | env.headers + ] + }} + + "/retry_after_invalid_time" -> + {:ok, %{env | status: 200}} end - {response, retries + 1} + {response, %{state | retries: retries + 1}} end) end end @@ -60,6 +183,24 @@ defmodule Tesla.Middleware.RetryTest do adapter LaggyAdapter end + defmodule ClientUsingRetryAfterHeader do + use Tesla + + plug Tesla.Middleware.Retry, + delay: 10, + max_retries: 10, + use_retry_after_header: true, + should_retry: fn + {:ok, %{status: status}}, _env, _context when status in [429] -> + true + + {:ok, _reason}, _env, _context -> + false + end + + adapter LaggyAdapter + end + setup_all do {:ok, _pid} = LaggyAdapter.start_link() :ok @@ -108,6 +249,117 @@ defmodule Tesla.Middleware.RetryTest do assert {:error, :nxdomain} = ClientWithShouldRetryFunction.put("/maybe", "payload") end + test "use Retry-After header when it is a integer number of seconds" do + assert {:ok, %Tesla.Env{url: "/retry_after_seconds", method: :get, status: 200}} = + ClientUsingRetryAfterHeader.get("/retry_after_seconds") + + assert Agent.get(LaggyAdapter, fn %{retries: retries} -> retries end) == 2 + end + + test "use Retry-After header when it is a HTTP Date string" do + assert {:ok, %Tesla.Env{url: "/retry_after_date", method: :get, status: 200}} = + ClientUsingRetryAfterHeader.get("/retry_after_date") + + assert Agent.get(LaggyAdapter, fn %{retries: retries} -> retries end) == 2 + end + + test "ignore Retry-After header if it is not in an expected format" do + assert {:ok, %Tesla.Env{url: "/retry_after_invalid_format", method: :get, status: 200}} = + ClientUsingRetryAfterHeader.get("/retry_after_invalid_format") + + assert Agent.get(LaggyAdapter, fn %{retries: retries} -> retries end) == 6 + end + + test "ignore Retry-After header if it has an invalid month" do + assert {:ok, %Tesla.Env{url: "/retry_after_invalid_month", method: :get, status: 200}} = + ClientUsingRetryAfterHeader.get("/retry_after_invalid_month") + + assert Agent.get(LaggyAdapter, fn %{retries: retries} -> retries end) == 6 + end + + test "ignore Retry-After header if it has an invalid day" do + assert {:ok, %Tesla.Env{url: "/retry_after_invalid_day", method: :get, status: 200}} = + ClientUsingRetryAfterHeader.get("/retry_after_invalid_day") + + assert Agent.get(LaggyAdapter, fn %{retries: retries} -> retries end) == 6 + end + + test "ignore Retry-After header if it has an invalid year" do + assert {:ok, %Tesla.Env{url: "/retry_after_invalid_year", method: :get, status: 200}} = + ClientUsingRetryAfterHeader.get("/retry_after_invalid_year") + + assert Agent.get(LaggyAdapter, fn %{retries: retries} -> retries end) == 6 + end + + test "ignore Retry-After header if it has an invalid time" do + assert {:ok, %Tesla.Env{url: "/retry_after_invalid_time", method: :get, status: 200}} = + ClientUsingRetryAfterHeader.get("/retry_after_invalid_time") + + assert Agent.get(LaggyAdapter, fn %{retries: retries} -> retries end) == 6 + end + + test "do not retry if Retry-After delay exceeds max delay" do + defmodule ClientUsingRetryAfterHeaderWithMaxDelay do + use Tesla + + plug Tesla.Middleware.Retry, + delay: 10, + max_delay: 500, + max_retries: 10, + use_retry_after_header: true, + should_retry: fn + {:ok, %{status: status}}, _env, _context when status in [429] -> + true + + {:ok, _reason}, _env, _context -> + false + end + + adapter LaggyAdapter + end + + assert {:ok, %Tesla.Env{url: "/retry_after_seconds", method: :get, status: 429}} = + ClientUsingRetryAfterHeaderWithMaxDelay.get("/retry_after_seconds") + + assert Agent.get(LaggyAdapter, fn %{retries: retries} -> retries end) == 1 + end + + test "jitter doesn't allow delay to be shorter than specified by Retry-After heade or larger than max delay" do + defmodule ClientUsingRetryAfterHeaderWithHighJitter do + use Tesla + + plug Tesla.Middleware.Retry, + delay: 10, + max_delay: 2001, + max_retries: 1, + jitter_factor: 0.9999, + use_retry_after_header: true, + should_retry: fn + {:ok, %{status: status}}, _env, _context when status in [429] -> + true + + {:ok, _reason}, _env, _context -> + false + end + + adapter LaggyAdapter + end + + assert {:ok, %Tesla.Env{url: "/retry_after_seconds", method: :get, status: 200}} = + ClientUsingRetryAfterHeaderWithHighJitter.get("/retry_after_seconds") + + finish_time = :os.system_time(:millisecond) + + # need to allow some time for the request handling; should be small relative to max_delay to minimize probability of false negatives + allowed_execution_ms = 100 + + %{retries: retries, start_time: start_time} = Agent.get(LaggyAdapter, fn state -> state end) + + assert retries == 2 + + assert finish_time < DateTime.to_unix(start_time, :millisecond) + 2001 + allowed_execution_ms + end + defmodule DefunctClient do use Tesla @@ -231,4 +483,18 @@ defmodule Tesla.Middleware.RetryTest do ClientWithShouldRetryArity2.get("/ok") end end + + test "ensures use_retry_after_header is a boolean" do + defmodule ClientWithStringUseRetryAfterHeader do + use Tesla + plug Tesla.Middleware.Retry, use_retry_after_header: 1 + adapter LaggyAdapter + end + + assert_raise ArgumentError, + ~r/expected :use_retry_after_header to be a boolean, got 1/, + fn -> + ClientWithStringUseRetryAfterHeader.get("/ok") + end + end end