From 42d7deaa04d9f49b9e4c8716c30285bc6e214374 Mon Sep 17 00:00:00 2001 From: Michael Welborn Date: Thu, 14 Dec 2023 17:23:59 -0600 Subject: [PATCH 1/3] Refactor `retry` to include exponential backoff and jitter --- indico_toolkit/client.py | 2 +- indico_toolkit/indico_wrapper/__init__.py | 2 +- indico_toolkit/indico_wrapper/download.py | 6 +- .../indico_wrapper/indico_wrapper.py | 4 +- indico_toolkit/retry.py | 77 ++++++++++++++----- tests/test_retry.py | 43 +++++++---- 6 files changed, 89 insertions(+), 45 deletions(-) diff --git a/indico_toolkit/client.py b/indico_toolkit/client.py index 000ccb7c..ec449633 100644 --- a/indico_toolkit/client.py +++ b/indico_toolkit/client.py @@ -4,7 +4,7 @@ from indico_toolkit.retry import retry -@retry((IndicoRequestError, ConnectionError)) +@retry(IndicoRequestError, ConnectionError) def create_client( host: str, api_token_path: str = None, diff --git a/indico_toolkit/indico_wrapper/__init__.py b/indico_toolkit/indico_wrapper/__init__.py index b1bf3b33..7c993624 100644 --- a/indico_toolkit/indico_wrapper/__init__.py +++ b/indico_toolkit/indico_wrapper/__init__.py @@ -1,4 +1,4 @@ -from .indico_wrapper import IndicoWrapper, retry +from .indico_wrapper import IndicoWrapper from .workflow import Workflow from .dataset import Datasets from .reviewer import Reviewer diff --git a/indico_toolkit/indico_wrapper/download.py b/indico_toolkit/indico_wrapper/download.py index af44f7d5..cadb11b2 100644 --- a/indico_toolkit/indico_wrapper/download.py +++ b/indico_toolkit/indico_wrapper/download.py @@ -103,14 +103,14 @@ def _download_pdfs_from_export( return max_files_to_download return export_df.shape[0] - @retry((IndicoRequestError, ConnectionError)) + @retry(IndicoRequestError, ConnectionError) def _download_export(self, export_id: int) -> pd.DataFrame: """ Download a dataframe representation of your dataset export """ return self.client.call(DownloadExport(export_id=export_id)) - @retry((IndicoRequestError, ConnectionError)) + @retry(IndicoRequestError, ConnectionError) def _create_export( self, dataset_id: int, @@ -142,7 +142,7 @@ def _create_export( ) ) - @retry((IndicoRequestError, ConnectionError)) + @retry(IndicoRequestError, ConnectionError) def _retrieve_storage_object(self, url: str): return self.client.call(RetrieveStorageObject(url)) diff --git a/indico_toolkit/indico_wrapper/indico_wrapper.py b/indico_toolkit/indico_wrapper/indico_wrapper.py index 6ad751e5..752d2d76 100644 --- a/indico_toolkit/indico_wrapper/indico_wrapper.py +++ b/indico_toolkit/indico_wrapper/indico_wrapper.py @@ -68,7 +68,7 @@ def train_model( ) ) - @retry((IndicoRequestError, ConnectionError)) + @retry(IndicoRequestError, ConnectionError) def get_storage_object(self, storage_url: str): return self.client.call(RetrieveStorageObject(storage_url)) @@ -78,7 +78,7 @@ def create_storage_urls(self, file_paths: List[str]) -> List[str]: def get_job_status(self, job_id: int, wait: bool = True, timeout: float = None): return self.client.call(JobStatus(id=job_id, wait=wait, timeout=timeout)) - @retry((IndicoRequestError, ConnectionError)) + @retry(IndicoRequestError, ConnectionError) def graphQL_request(self, graphql_query: str, variables: dict = None): return self.client.call( GraphQLRequest(query=graphql_query, variables=variables) diff --git a/indico_toolkit/retry.py b/indico_toolkit/retry.py index 43111f01..4d90811a 100644 --- a/indico_toolkit/retry.py +++ b/indico_toolkit/retry.py @@ -1,27 +1,62 @@ -from functools import wraps import time +from functools import wraps +from random import random +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Callable + from typing import TypeVar + + ReturnType = TypeVar("ReturnType") -def retry(exceptions, num_retries=5, wait=0.5): +class MaxRetriesExceeded(Exception): """ - Decorator for retrying functions after specified exceptions are raised - Args: - exceptions (Exception or Tuple[Exception]): exceptions that should be retried on - wait (float): time in seconds to wait before retrying - num_retries (int): the number of times to retry the wrapped function + Raised when a function has retried more than `count` number of times. """ - def retry_decorator(fn): - @wraps(fn) - def retry_func(*args, **kwargs): - retries = 0 - while True: + + +def retry( + *errors: "type[Exception]", + count: int = 4, + wait: float = 1, + backoff: float = 4, + jitter: float = 0.5, +) -> "Callable[[Callable[..., ReturnType]], Callable[..., ReturnType]]": + """ + Decorate a function to automatically retry when it raises specific errors, + apply exponential backoff and jitter to the wait time, + and raise `MaxRetriesExceeded` after it retries too many times. + + By default, the decorated method will be retried up to 4 times over the course of + ~2 minutes (waiting 1, 4, 16, and 64 seconds; plus up to 50% jitter) + before raising `MaxRetriesExceeded` from the last error. + + Arguments: + errors: Retry the function when it raises one of these errors. + count: Retry the function this many times before raising `MaxRetriesExceeded`. + wait: Wait this many seconds after the first error before retrying. + backoff: Multiply the wait time by this amount for each additional error. + jitter: Add a random amount of time (up to this percent as a decimal) + to the wait time to prevent simultaneous retries. + """ + + def retry_decorator( + function: "Callable[..., ReturnType]", + ) -> "Callable[..., ReturnType]": + @wraps(function) + def retrying_function(*args: object, **kwargs: object) -> "ReturnType": + for times_retried in range(count + 1): try: - return fn(*args, **kwargs) - except exceptions as e: - if retries >= num_retries: - raise e - else: - retries += 1 - time.sleep(wait) - return retry_func - return retry_decorator \ No newline at end of file + return function(*args, **kwargs) + except errors as error: + last_error = error + + if times_retried >= count: + raise MaxRetriesExceeded() from last_error + + time.sleep(wait * backoff**times_retried * (1 + jitter * random())) + + return retrying_function + + return retry_decorator diff --git a/tests/test_retry.py b/tests/test_retry.py index c91fa875..fb4e23e0 100644 --- a/tests/test_retry.py +++ b/tests/test_retry.py @@ -3,21 +3,30 @@ from indico_toolkit import retry -counter = 0 - - -def test_retry_decor(): - @retry(Exception) - def no_exceptions(): - return True - - @retry((RuntimeError, ConnectionError), num_retries=7) - def raises_exceptions(): - global counter - counter +=1 - raise RuntimeError("Test runtime fail") - - assert no_exceptions() - with pytest.raises(RuntimeError): +@retry(Exception) +def no_exceptions(): + return True + + +def test_retry_decorator_returns() -> None: + assert no_exceptions() is True + + +calls = 0 + + +@retry(RuntimeError, count=5, wait=0) +def raises_exceptions(): + global calls + calls += 1 + raise RuntimeError() + + +def test_retry_max_exceeded() -> None: + global calls + calls = 0 + + with pytest.raises(Exception): raises_exceptions() - assert counter == 8 \ No newline at end of file + + assert calls == 6 From 0aa8c9b10038203f5267f80467597c85f628cac3 Mon Sep 17 00:00:00 2001 From: Michael Welborn Date: Thu, 14 Dec 2023 17:33:22 -0600 Subject: [PATCH 2/3] Remove retry from packages __all__ to avoid shadowing the module --- indico_toolkit/__init__.py | 1 - indico_toolkit/indico_wrapper/download.py | 3 ++- indico_toolkit/indico_wrapper/indico_wrapper.py | 3 ++- tests/test_retry.py | 4 ++-- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/indico_toolkit/__init__.py b/indico_toolkit/__init__.py index 4df66ad8..e9eed182 100644 --- a/indico_toolkit/__init__.py +++ b/indico_toolkit/__init__.py @@ -3,4 +3,3 @@ from .errors import * from .client import create_client -from .retry import retry diff --git a/indico_toolkit/indico_wrapper/download.py b/indico_toolkit/indico_wrapper/download.py index cadb11b2..2e5d2759 100644 --- a/indico_toolkit/indico_wrapper/download.py +++ b/indico_toolkit/indico_wrapper/download.py @@ -3,7 +3,8 @@ import pandas as pd from indico.types.export import Export from indico import IndicoClient, IndicoRequestError -from indico_toolkit import retry, ToolkitInputError +from indico_toolkit import ToolkitInputError +from indico_toolkit.retry import retry from indico.queries import ( RetrieveStorageObject, DownloadExport, diff --git a/indico_toolkit/indico_wrapper/indico_wrapper.py b/indico_toolkit/indico_wrapper/indico_wrapper.py index 752d2d76..b6b46206 100644 --- a/indico_toolkit/indico_wrapper/indico_wrapper.py +++ b/indico_toolkit/indico_wrapper/indico_wrapper.py @@ -11,8 +11,9 @@ from indico import IndicoClient from indico.errors import IndicoRequestError +from indico_toolkit import ToolkitInputError +from indico_toolkit.retry import retry from indico_toolkit.types import Predictions -from indico_toolkit import ToolkitStatusError, retry class IndicoWrapper: diff --git a/tests/test_retry.py b/tests/test_retry.py index fb4e23e0..ba9f3105 100644 --- a/tests/test_retry.py +++ b/tests/test_retry.py @@ -1,6 +1,6 @@ import pytest -from indico_toolkit import retry +from indico_toolkit.retry import retry, MaxRetriesExceeded @retry(Exception) @@ -26,7 +26,7 @@ def test_retry_max_exceeded() -> None: global calls calls = 0 - with pytest.raises(Exception): + with pytest.raises(MaxRetriesExceeded): raises_exceptions() assert calls == 6 From 0cf4fc7be850ed3c5e79729e38e6a45b8b994402 Mon Sep 17 00:00:00 2001 From: Michael Welborn Date: Mon, 13 Jan 2025 15:00:14 -0600 Subject: [PATCH 3/3] Support decorating async coroutines with retry --- indico_toolkit/retry.py | 91 ++++++++++++++++++++++++++++++----------- pyproject.toml | 7 ++-- requirements.txt | 5 ++- tests/test_retry.py | 71 +++++++++++++++++++++++++------- 4 files changed, 130 insertions(+), 44 deletions(-) diff --git a/indico_toolkit/retry.py b/indico_toolkit/retry.py index 4d90811a..24362344 100644 --- a/indico_toolkit/retry.py +++ b/indico_toolkit/retry.py @@ -1,13 +1,17 @@ +import asyncio import time from functools import wraps +from inspect import iscoroutinefunction from random import random -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, overload if TYPE_CHECKING: - from collections.abc import Callable - from typing import TypeVar + from collections.abc import Awaitable, Callable + from typing import ParamSpec, TypeVar - ReturnType = TypeVar("ReturnType") + ArgumentsType = ParamSpec("ArgumentsType") + OuterReturnType = TypeVar("OuterReturnType") + InnerReturnType = TypeVar("InnerReturnType") class MaxRetriesExceeded(Exception): @@ -22,14 +26,14 @@ def retry( wait: float = 1, backoff: float = 4, jitter: float = 0.5, -) -> "Callable[[Callable[..., ReturnType]], Callable[..., ReturnType]]": +) -> "Callable[[Callable[ArgumentsType, OuterReturnType]], Callable[ArgumentsType, OuterReturnType]]": # noqa: E501 """ - Decorate a function to automatically retry when it raises specific errors, + Decorate a function or coroutine to retry when it raises specified errors, apply exponential backoff and jitter to the wait time, and raise `MaxRetriesExceeded` after it retries too many times. - By default, the decorated method will be retried up to 4 times over the course of - ~2 minutes (waiting 1, 4, 16, and 64 seconds; plus up to 50% jitter) + By default, the decorated function or coroutine will be retried up to 4 times over + the course of ~2 minutes (waiting 1, 4, 16, and 64 seconds; plus up to 50% jitter) before raising `MaxRetriesExceeded` from the last error. Arguments: @@ -41,22 +45,61 @@ def retry( to the wait time to prevent simultaneous retries. """ + def wait_time(times_retried: int) -> float: + """ + Calculate the sleep time based on number of times retried. + """ + return wait * backoff**times_retried * (1 + jitter * random()) + + @overload + def retry_decorator( + decorated: "Callable[ArgumentsType, Awaitable[InnerReturnType]]", + ) -> "Callable[ArgumentsType, Awaitable[InnerReturnType]]": ... + @overload + def retry_decorator( + decorated: "Callable[ArgumentsType, InnerReturnType]", + ) -> "Callable[ArgumentsType, InnerReturnType]": ... def retry_decorator( - function: "Callable[..., ReturnType]", - ) -> "Callable[..., ReturnType]": - @wraps(function) - def retrying_function(*args: object, **kwargs: object) -> "ReturnType": - for times_retried in range(count + 1): - try: - return function(*args, **kwargs) - except errors as error: - last_error = error - - if times_retried >= count: - raise MaxRetriesExceeded() from last_error - - time.sleep(wait * backoff**times_retried * (1 + jitter * random())) - - return retrying_function + decorated: "Callable[ArgumentsType, InnerReturnType]", + ) -> "Callable[ArgumentsType, Awaitable[InnerReturnType]] | Callable[ArgumentsType, InnerReturnType]": # noqa: E501 + """ + Decorate either a function or coroutine as appropriate. + """ + if iscoroutinefunction(decorated): + + @wraps(decorated) + async def retrying_coroutine( # type: ignore[return] + *args: "ArgumentsType.args", **kwargs: "ArgumentsType.kwargs" + ) -> "InnerReturnType": + for times_retried in range(count + 1): + try: + return await decorated(*args, **kwargs) # type: ignore[no-any-return] + except errors as error: + last_error = error + + if times_retried >= count: + raise MaxRetriesExceeded() from last_error + + await asyncio.sleep(wait_time(times_retried)) + + return retrying_coroutine + else: + + @wraps(decorated) + def retrying_function( # type: ignore[return] + *args: "ArgumentsType.args", **kwargs: "ArgumentsType.kwargs" + ) -> "InnerReturnType": + for times_retried in range(count + 1): + try: + return decorated(*args, **kwargs) + except errors as error: + last_error = error + + if times_retried >= count: + raise MaxRetriesExceeded() from last_error + + time.sleep(wait_time(times_retried)) + + return retrying_function return retry_decorator diff --git a/pyproject.toml b/pyproject.toml index e535cc50..536abcfd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,9 +22,10 @@ requires = [ [tool.flit.metadata.requires-extra] test = [ - "pytest>=5.2.1", - "requests-mock>=1.7.0-7", - "pytest-dependency==0.5.1" + "pytest==8.3.4", + "pytest-asyncio==0.25.2", + "pytest-dependency==0.6.0", + "requests-mock>=1.7.0-7" ] full = [ "PyMuPDF==1.19.6", "spacy>=3.1.4,<4" diff --git a/requirements.txt b/requirements.txt index 8043b984..a19b51b6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,8 +2,9 @@ indico-client>=5.1.4 python-dateutil==2.8.1 PyMuPDF==1.19.6 pytz==2021.1 -pytest==6.2.2 -pytest-dependency==0.5.1 +pytest==8.3.4 +pytest-asyncio==0.25.2 +pytest-dependency==0.6.0 black==22.3 plotly==5.2.1 tqdm==4.50.0 diff --git a/tests/test_retry.py b/tests/test_retry.py index ba9f3105..f78c10c7 100644 --- a/tests/test_retry.py +++ b/tests/test_retry.py @@ -3,30 +3,71 @@ from indico_toolkit.retry import retry, MaxRetriesExceeded -@retry(Exception) -def no_exceptions(): - return True +def test_no_errors() -> None: + @retry(Exception) + def no_errors() -> bool: + return True + assert no_errors() -def test_retry_decorator_returns() -> None: - assert no_exceptions() is True +def test_raises_errors() -> None: + calls = 0 + + @retry(RuntimeError, count=4, wait=0) + def raises_errors() -> None: + nonlocal calls + calls += 1 + raise RuntimeError() + + with pytest.raises(MaxRetriesExceeded): + raises_errors() + + assert calls == 5 + + +def test_raises_other_errors() -> None: + calls = 0 -calls = 0 + @retry(RuntimeError, count=4, wait=0) + def raises_errors() -> None: + nonlocal calls + calls += 1 + raise ValueError() + with pytest.raises(ValueError): + raises_errors() -@retry(RuntimeError, count=5, wait=0) -def raises_exceptions(): - global calls - calls += 1 - raise RuntimeError() + assert calls == 1 -def test_retry_max_exceeded() -> None: - global calls +@pytest.mark.asyncio +async def test_raises_errors_async() -> None: calls = 0 + @retry(RuntimeError, count=4, wait=0) + async def raises_errors() -> None: + nonlocal calls + calls += 1 + raise RuntimeError() + with pytest.raises(MaxRetriesExceeded): - raises_exceptions() + await raises_errors() + + assert calls == 5 + + +@pytest.mark.asyncio +async def test_raises_other_errors_async() -> None: + calls = 0 + + @retry(RuntimeError, count=4, wait=0) + async def raises_errors() -> None: + nonlocal calls + calls += 1 + raise ValueError() + + with pytest.raises(ValueError): + await raises_errors() - assert calls == 6 + assert calls == 1