From 98ee277a75fae7f39d76eab8ce4961cca25f79db Mon Sep 17 00:00:00 2001 From: Nathan Thorpe Date: Wed, 19 Apr 2023 11:05:19 -0700 Subject: [PATCH 1/3] catch timeout on upload --- cirro/file_utils.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/cirro/file_utils.py b/cirro/file_utils.py index 149d209e..cffb3947 100644 --- a/cirro/file_utils.py +++ b/cirro/file_utils.py @@ -1,8 +1,11 @@ import os +import random +import time from pathlib import Path, PurePath from typing import List, Union from boto3.exceptions import S3UploadFailedError +from botocore.exceptions import ConnectionError from cirro.api.clients import S3Client from cirro.api.models.file import DirectoryStatistics, File @@ -86,11 +89,12 @@ def upload_directory(directory: str, files: List[str], s3_client: S3Client, buck success = True # Catch the upload error - except S3UploadFailedError as e: - + except (S3UploadFailedError, ConnectionError) as e: + delay = random.uniform(0, 60) + retry * 60 # Report the error print(f"Encountered error:\n{str(e)}\n" - f"Retrying ({max_retries - (retry + 1)} attempts remaining)") + f"Retrying in {delay:.0f} seconds ({max_retries - (retry + 1)} attempts remaining)") + time.sleep(delay) if success: break From b6928efb718f445e6c9aca5d143c35e7e5a7d576 Mon Sep 17 00:00:00 2001 From: Nathan Thorpe Date: Wed, 19 Apr 2023 11:19:37 -0700 Subject: [PATCH 2/3] add configurable transfer max retries --- README.md | 4 ++++ cirro/api/config.py | 9 +++++++-- cirro/api/services/file.py | 3 ++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 2a9b47e3..7d0388d7 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,11 @@ The `cirro-cli configure` command creates a file in `PW_HOME` called `config.ini You can set the `base_url` property in the config file rather than using the environment variable. +The `transfer_max_retries` configuration property specifies the maximum number of times to attempt uploading a file to Cirro in the event of a transfer failure. +When uploading files to Cirro, network issues or temporary outages can occasionally cause a transfer to fail. + ```ini [General] base_url = data-portal.io +transfer_max_retries = 15 ``` \ No newline at end of file diff --git a/cirro/api/config.py b/cirro/api/config.py index 33da7995..83a373ee 100644 --- a/cirro/api/config.py +++ b/cirro/api/config.py @@ -11,12 +11,14 @@ class Constants: home = os.environ.get('PW_HOME', '~/.cirro') config_path = Path(home, 'config.ini').expanduser() default_base_url = "data-portal.io" + default_max_retries = 10 class UserConfig(NamedTuple): auth_method: str auth_method_config: Dict # This needs to match the init params of the auth method base_url: Optional[str] + transfer_max_retries: Optional[int] def save_user_config(user_config: UserConfig): @@ -24,7 +26,8 @@ def save_user_config(user_config: UserConfig): ini_config = configparser.SafeConfigParser() ini_config['General'] = { 'auth_method': user_config.auth_method, - 'base_url': Constants.default_base_url + 'base_url': Constants.default_base_url, + 'transfer_max_retries': Constants.default_max_retries } if original_user_config: ini_config['General']['base_url'] = original_user_config.base_url @@ -45,6 +48,7 @@ def load_user_config() -> Optional[UserConfig]: main_config = ini_config['General'] auth_method = main_config.get('auth_method') base_url = main_config.get('base_url') + transfer_max_retries = main_config.getint('transfer_max_retries', Constants.default_max_retries) if auth_method and ini_config.has_section(auth_method): auth_method_config = dict(ini_config[auth_method]) @@ -54,7 +58,8 @@ def load_user_config() -> Optional[UserConfig]: return UserConfig( auth_method=auth_method, auth_method_config=auth_method_config, - base_url=base_url + base_url=base_url, + transfer_max_retries=transfer_max_retries ) except Exception: raise RuntimeError('Configuration load error, please re-run configuration') diff --git a/cirro/api/services/file.py b/cirro/api/services/file.py index 14f818fa..a62aac95 100644 --- a/cirro/api/services/file.py +++ b/cirro/api/services/file.py @@ -54,7 +54,8 @@ def upload_files(self, access_context: FileAccessContext, directory: str, files: :return: """ s3_client = S3Client(partial(self.get_access_credentials, access_context), self._configuration.region) - upload_directory(directory, files, s3_client, access_context.bucket, access_context.path_prefix) + upload_directory(directory, files, s3_client, access_context.bucket, access_context.path_prefix, + max_retries=self._configuration.user_config.transfer_max_retries) def download_files(self, access_context: FileAccessContext, directory: str, files: List[str]): """ From a6eb059748d4fda54116a2f252501448b89400e8 Mon Sep 17 00:00:00 2001 From: Nathan Thorpe Date: Wed, 19 Apr 2023 12:13:35 -0700 Subject: [PATCH 3/3] document pause behavior --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 7d0388d7..8be88f14 100644 --- a/README.md +++ b/README.md @@ -120,6 +120,7 @@ You can set the `base_url` property in the config file rather than using the env The `transfer_max_retries` configuration property specifies the maximum number of times to attempt uploading a file to Cirro in the event of a transfer failure. When uploading files to Cirro, network issues or temporary outages can occasionally cause a transfer to fail. +It will pause for an increasing amount of time for each retry attempt. ```ini [General]