feat: add rate limit statistics (#343)

Mantisus · web-flow · commit f35c68ff824c · 2025-02-04T11:46:38.000+01:00
### Description - Add `Statistics` for gather HTTP rate limit errors - Linked Issues is in the sdk, but the implementation should be in the client ### Issues - apify/apify-sdk-python#318 ### Testing - Add tests for `Statistics`
diff --git a/src/apify_client/_http_client.py b/src/apify_client/_http_client.py
@@ -14,6 +14,7 @@
 
 from apify_client._errors import ApifyApiError, InvalidResponseBodyError, is_retryable_error
 from apify_client._logging import log_context, logger_name
+from apify_client._statistics import Statistics
 from apify_client._utils import retry_with_exp_backoff, retry_with_exp_backoff_async
 
 if TYPE_CHECKING:
@@ -35,6 +36,7 @@ def __init__(
         max_retries: int = 8,
         min_delay_between_retries_millis: int = 500,
         timeout_secs: int = 360,
+        stats: Statistics | None = None,
     ) -> None:
         self.max_retries = max_retries
         self.min_delay_between_retries_millis = min_delay_between_retries_millis
@@ -59,6 +61,8 @@ def __init__(
         self.httpx_client = httpx.Client(headers=headers, follow_redirects=True, timeout=timeout_secs)
         self.httpx_async_client = httpx.AsyncClient(headers=headers, follow_redirects=True, timeout=timeout_secs)
 
+        self.stats = stats or Statistics()
+
     @staticmethod
     def _maybe_parse_response(response: httpx.Response) -> Any:
         if response.status_code == HTTPStatus.NO_CONTENT:
@@ -143,6 +147,8 @@ def call(
         log_context.method.set(method)
         log_context.url.set(url)
 
+        self.stats.calls += 1
+
         if stream and parse_response:
             raise ValueError('Cannot stream response and parse it at the same time!')
 
@@ -153,6 +159,9 @@ def call(
         def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response:
             log_context.attempt.set(attempt)
             logger.debug('Sending request')
+
+            self.stats.requests += 1
+
             try:
                 request = httpx_client.build_request(
                     method=method,
@@ -177,6 +186,9 @@ def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response:
 
                     return response
 
+                if response.status_code == HTTPStatus.TOO_MANY_REQUESTS:
+                    self.stats.add_rate_limit_error(attempt)
+
             except Exception as e:
                 logger.debug('Request threw exception', exc_info=e)
                 if not is_retryable_error(e):
@@ -217,6 +229,8 @@ async def call(
         log_context.method.set(method)
         log_context.url.set(url)
 
+        self.stats.calls += 1
+
         if stream and parse_response:
             raise ValueError('Cannot stream response and parse it at the same time!')
 
@@ -251,6 +265,9 @@ async def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response
 
                     return response
 
+                if response.status_code == HTTPStatus.TOO_MANY_REQUESTS:
+                    self.stats.add_rate_limit_error(attempt)
+
             except Exception as e:
                 logger.debug('Request threw exception', exc_info=e)
                 if not is_retryable_error(e):
diff --git a/src/apify_client/_statistics.py b/src/apify_client/_statistics.py
@@ -0,0 +1,27 @@
+from collections import defaultdict
+from dataclasses import dataclass, field
+
+
+@dataclass
+class Statistics:
+    """Statistics about API client usage and rate limit errors."""
+
+    calls: int = 0
+    """Total number of API method calls made by the client."""
+
+    requests: int = 0
+    """Total number of HTTP requests sent, including retries."""
+
+    rate_limit_errors: defaultdict[int, int] = field(default_factory=lambda: defaultdict(int))
+    """List tracking which retry attempts encountered rate limit (429) errors."""
+
+    def add_rate_limit_error(self, attempt: int) -> None:
+        """Add rate limit error for specific attempt.
+
+        Args:
+            attempt: The attempt number (1-based indexing).
+        """
+        if attempt < 1:
+            raise ValueError('Attempt must be greater than 0')
+
+        self.rate_limit_errors[attempt - 1] += 1
diff --git a/src/apify_client/client.py b/src/apify_client/client.py
@@ -3,6 +3,7 @@
 from apify_shared.utils import ignore_docs
 
 from apify_client._http_client import HTTPClient, HTTPClientAsync
+from apify_client._statistics import Statistics
 from apify_client.clients import (
     ActorClient,
     ActorClientAsync,
@@ -126,11 +127,13 @@ def __init__(
             timeout_secs=timeout_secs,
         )
 
+        self.stats = Statistics()
         self.http_client = HTTPClient(
             token=token,
             max_retries=self.max_retries,
             min_delay_between_retries_millis=self.min_delay_between_retries_millis,
             timeout_secs=self.timeout_secs,
+            stats=self.stats,
         )
 
     def actor(self, actor_id: str) -> ActorClient:
diff --git a/tests/unit/test_statistics.py b/tests/unit/test_statistics.py
@@ -0,0 +1,44 @@
+import pytest
+
+from apify_client._statistics import Statistics
+
+
+@pytest.mark.parametrize(
+    ('attempts', 'expected_errors'),
+    [
+        pytest.param([1], {0: 1}, id='single error'),
+        pytest.param([1, 5], {0: 1, 4: 1}, id='two single errors'),
+        pytest.param([5, 1], {0: 1, 4: 1}, id='two single errors reversed'),
+        pytest.param([3, 5, 1], {0: 1, 2: 1, 4: 1}, id='three single errors'),
+        pytest.param([1, 5, 3], {0: 1, 2: 1, 4: 1}, id='three single errors reordered'),
+        pytest.param([2, 1, 2, 1, 5, 2, 1], {0: 3, 1: 3, 4: 1}, id='multiple errors per attempt'),
+    ],
+)
+def test_add_rate_limit_error(attempts: list[int], expected_errors: list[int]) -> None:
+    """Test that add_rate_limit_error correctly tracks errors for different attempt sequences."""
+    stats = Statistics()
+    for attempt in attempts:
+        stats.add_rate_limit_error(attempt)
+    assert stats.rate_limit_errors == expected_errors
+
+
+def test_add_rate_limit_error_invalid_attempt() -> None:
+    """Test that add_rate_limit_error raises ValueError for invalid attempt."""
+    stats = Statistics()
+    with pytest.raises(ValueError, match='Attempt must be greater than 0'):
+        stats.add_rate_limit_error(0)
+
+
+def test_statistics_initial_state() -> None:
+    """Test initial state of Statistics instance."""
+    stats = Statistics()
+    assert stats.calls == 0
+    assert stats.requests == 0
+    assert stats.rate_limit_errors == {}
+
+
+def test_add_rate_limit_error_type_validation() -> None:
+    """Test type validation in add_rate_limit_error."""
+    stats = Statistics()
+    with pytest.raises(TypeError):
+        stats.add_rate_limit_error('1')  # type: ignore[arg-type]