Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ public final class ConfigGenerator implements Runnable {
.nullable(false)
.initialize(writer -> {
writer.addDependency(SmithyPythonDependency.SMITHY_CORE);
writer.addImport("smithy_core.retries", "SimpleRetryStrategy");
writer.write("self.retry_strategy = retry_strategy or SimpleRetryStrategy()");
writer.addImport("smithy_core.retries", "StandardRetryStrategy");
writer.write("self.retry_strategy = retry_strategy or StandardRetryStrategy()");
})
.build(),
ConfigProperty.builder()
Expand Down
1 change: 1 addition & 0 deletions packages/smithy-aws-core/CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
### Features

* Added a hand-written implmentation for the `restJson1` protocol.
* Added a new retry mode `standard` and made it the default retry strategy.
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this considered a feature or breaking change since we're updating a default?


## v0.0.3

Expand Down
6 changes: 3 additions & 3 deletions packages/smithy-core/src/smithy_core/aio/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ async def _retry[I: SerializeableShape, O: DeserializeableShape](
return await self._handle_attempt(call, request_context, request_future)

retry_strategy = call.retry_strategy
retry_token = retry_strategy.acquire_initial_retry_token(
retry_token = await retry_strategy.acquire_initial_retry_token(
token_scope=call.retry_scope
)

Expand All @@ -349,7 +349,7 @@ async def _retry[I: SerializeableShape, O: DeserializeableShape](

if isinstance(output_context.response, Exception):
try:
retry_strategy.refresh_retry_token_for_retry(
retry_token = await retry_strategy.refresh_retry_token_for_retry(
token_to_renew=retry_token,
error=output_context.response,
)
Expand All @@ -364,7 +364,7 @@ async def _retry[I: SerializeableShape, O: DeserializeableShape](

await seek(request_context.transport_request.body, 0)
else:
retry_strategy.record_success(token=retry_token)
await retry_strategy.record_success(token=retry_token)
return output_context

async def _handle_attempt[I: SerializeableShape, O: DeserializeableShape](
Expand Down
6 changes: 3 additions & 3 deletions packages/smithy-core/src/smithy_core/interfaces/retries.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class RetryStrategy(Protocol):
max_attempts: int
"""Upper limit on total attempt count (initial attempt plus retries)."""

def acquire_initial_retry_token(
async def acquire_initial_retry_token(
self, *, token_scope: str | None = None
) -> RetryToken:
"""Called before any retries (for the first attempt at the operation).
Expand All @@ -74,7 +74,7 @@ def acquire_initial_retry_token(
"""
...

def refresh_retry_token_for_retry(
async def refresh_retry_token_for_retry(
self, *, token_to_renew: RetryToken, error: Exception
) -> RetryToken:
"""Replace an existing retry token from a failed attempt with a new token.
Expand All @@ -91,7 +91,7 @@ def refresh_retry_token_for_retry(
"""
...

def record_success(self, *, token: RetryToken) -> None:
async def record_success(self, *, token: RetryToken) -> None:
"""Return token after successful completion of an operation.

Upon successful completion of the operation, a user calls this function to
Expand Down
160 changes: 157 additions & 3 deletions packages/smithy-core/src/smithy_core/retries.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
import asyncio
import random
from collections.abc import Callable
from dataclasses import dataclass
Expand Down Expand Up @@ -204,7 +205,7 @@ def __init__(
self.backoff_strategy = backoff_strategy or ExponentialRetryBackoffStrategy()
self.max_attempts = max_attempts

def acquire_initial_retry_token(
async def acquire_initial_retry_token(
self, *, token_scope: str | None = None
) -> SimpleRetryToken:
"""Called before any retries (for the first attempt at the operation).
Expand All @@ -214,7 +215,7 @@ def acquire_initial_retry_token(
retry_delay = self.backoff_strategy.compute_next_backoff_delay(0)
return SimpleRetryToken(retry_count=0, retry_delay=retry_delay)

def refresh_retry_token_for_retry(
async def refresh_retry_token_for_retry(
self,
*,
token_to_renew: retries_interface.RetryToken,
Expand All @@ -240,5 +241,158 @@ def refresh_retry_token_for_retry(
else:
raise RetryError(f"Error is not retryable: {error}") from error

def record_success(self, *, token: retries_interface.RetryToken) -> None:
async def record_success(self, *, token: retries_interface.RetryToken) -> None:
"""Not used by this retry strategy."""


@dataclass(kw_only=True)
class StandardRetryToken:
retry_count: int
"""Retry count is the total number of attempts minus the initial attempt."""

retry_delay: float
"""Delay in seconds to wait before the retry attempt."""

quota_consumed: int = 0
"""The total amount of quota consumed."""

last_quota_acquired: int = 0
"""The amount of last quota acquired."""


class StandardRetryStrategy(retries_interface.RetryStrategy):
def __init__(self, *, max_attempts: int = 3):
"""Standard retry strategy using truncated binary exponential backoff with full
jitter.

:param max_attempts: Upper limit on total number of attempts made, including
initial attempt and retries.
"""
self.backoff_strategy = ExponentialRetryBackoffStrategy(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know the max_backoff is 20 seconds and it would be a bit redundant to specify that here, but I feel like that is the safer thing to do. If for some reason we decided that a different default is better for non-AWS clients, I don't want us to accidentally change the value used here.

backoff_scale_value=1,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit - Not really relevant to this PR, but we should consider adding some simple validation for these values on initialization (e.g., ensure they're positive integers).

jitter_type=ExponentialBackoffJitterType.FULL,
)
self.max_attempts = max_attempts
self._retry_quota = StandardRetryQuota()

async def acquire_initial_retry_token(
self, *, token_scope: str | None = None
) -> StandardRetryToken:
"""Called before any retries (for the first attempt at the operation).

:param token_scope: This argument is ignored by this retry strategy.
"""
retry_delay = self.backoff_strategy.compute_next_backoff_delay(0)
return StandardRetryToken(retry_count=0, retry_delay=retry_delay)

async def refresh_retry_token_for_retry(
self,
*,
token_to_renew: StandardRetryToken,
Copy link
Contributor

@jonathan343 jonathan343 Aug 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is going to result in the following typechecker errors:

/.../smithy-python/packages/smithy-core/src/smithy_core/retries.py
  /.../smithy-python/packages/smithy-core/src/smithy_core/retries.py:288:15 - error: Method "refresh_retry_token_for_retry" overrides class "RetryStrategy" in an incompatible manner
    Keyword parameter "token_to_renew" type mismatch: base parameter is type "RetryToken", override parameter is type "StandardRetryToken"
      "RetryToken" is not assignable to "StandardRetryToken" (reportIncompatibleMethodOverride)
  /.../smithy-python/packages/smithy-core/src/smithy_core/retries.py:331:15 - error: Method "record_success" overrides class "RetryStrategy" in an incompatible manner
    Keyword parameter "token" type mismatch: base parameter is type "RetryToken", override parameter is type "StandardRetryToken"
      "RetryToken" is not assignable to "StandardRetryToken" (reportIncompatibleMethodOverride)

I think we need to be using retries_interface.RetryToken here instead of StandardRetryToken.

Suggested change
token_to_renew: StandardRetryToken,
token_to_renew: retries_interface.RetryToken,

Making this change alone will result in even more typing errors because StandardRetryToken has extra attributes. I think we have the following options:

  • Use typing.cast to tell the typechecker we're using StandardRetryToken
  • Add a runtime check to validate StandardRetryToken is being used

error: Exception,
) -> StandardRetryToken:
"""Replace an existing retry token from a failed attempt with a new token.

This retry strategy always returns a token until the attempt count stored in
the new token exceeds the ``max_attempts`` value.

:param token_to_renew: The token used for the previous failed attempt.
:param error: The error that triggered the need for a retry.
:raises RetryError: If no further retry attempts are allowed.
"""
if isinstance(error, retries_interface.ErrorRetryInfo) and error.is_retry_safe:
retry_count = token_to_renew.retry_count + 1
if retry_count >= self.max_attempts:
raise RetryError(
f"Reached maximum number of allowed attempts: {self.max_attempts}"
) from error

# Acquire additional quota for this retry attempt
# (may raise a RetryError if none is available)
quota_acquired = await self._retry_quota.acquire(error=error)
total_quota = token_to_renew.quota_consumed + quota_acquired

if error.retry_after is not None:
retry_delay = error.retry_after
else:
retry_delay = self.backoff_strategy.compute_next_backoff_delay(
retry_count
)

return StandardRetryToken(
retry_count=retry_count,
retry_delay=retry_delay,
quota_consumed=total_quota,
last_quota_acquired=quota_acquired,
)
else:
raise RetryError(f"Error is not retryable: {error}") from error

async def record_success(self, *, token: StandardRetryToken) -> None:
"""Return token after successful completion of an operation.

Releases retry tokens back to the retry quota based on the previous amount
consumed.

:param token: The token used for the previous successful attempt.
"""
await self._retry_quota.release(release_amount=token.last_quota_acquired)


class StandardRetryQuota:
"""Retry quota used by :py:class:`StandardRetryStrategy`."""

INITIAL_RETRY_TOKENS = 500
RETRY_COST = 5
NO_RETRY_INCREMENT = 1
TIMEOUT_RETRY_COST = 10

def __init__(self):
self._max_capacity = self.INITIAL_RETRY_TOKENS
self._available_capacity = self.INITIAL_RETRY_TOKENS
self._lock = asyncio.Lock()

async def acquire(self, *, error: Exception) -> int:
"""Attempt to acquire a certain amount of capacity.

If there's no sufficient amount of capacity available, raise an exception.
Otherwise, we return the amount of capacity successfully allocated.
"""
# TODO: update `is_timeout` when `is_timeout_error` is implemented
is_timeout = False
capacity_amount = self.TIMEOUT_RETRY_COST if is_timeout else self.RETRY_COST

async with self._lock:
if capacity_amount > self._available_capacity:
raise RetryError("Retry quota exceeded")
self._available_capacity -= capacity_amount
return capacity_amount

async def release(self, *, release_amount: int) -> None:
"""Release capacity back to the retry quota.

The capacity being released will be truncated if necessary to ensure the max
capacity is never exceeded.
"""
increment = self.NO_RETRY_INCREMENT if release_amount == 0 else release_amount

if self._available_capacity == self._max_capacity:
return

async with self._lock:
self._available_capacity = min(
self._available_capacity + increment, self._max_capacity
)


class RetryStrategyMode(Enum):
"""Enumeration of available retry strategies."""

SIMPLE = "simple"
STANDARD = "standard"


RETRY_MODE_MAP = {
RetryStrategyMode.SIMPLE: SimpleRetryStrategy,
RetryStrategyMode.STANDARD: StandardRetryStrategy,
}
Loading