From 57990b27c782749f4c9e265a315315e27e0e4858 Mon Sep 17 00:00:00 2001 From: Dave Rigby Date: Wed, 15 May 2024 14:51:52 +0000 Subject: [PATCH] gRPC: Allow retries of up to MAX_MSG_SIZE gRPC has a built-in retry mechanism[1] which we configure to automatically retry on status UNAVAILABLE messages from Pinecone. However, it has been observed that VectorService/Upsert method is _not_ being retried automatically and causes an exception to be thrown to the application: Traceback (most recent call last): File ".venv/lib/python3.11/site-packages/pinecone/grpc/base.py", line 150, in wrapped return func( ^^^^^ File ".venv/lib64/python3.11/site-packages/grpc/_channel.py", line 1181, in __call__ return _end_unary_response_blocking(state, call, False, None) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File ".venv/lib64/python3.11/site-packages/grpc/_channel.py", line 1006, in _end_unary_response_blocking raise _InactiveRpcError(state) # pytype: disable=not-instantiable ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ grpc._channel._InactiveRpcError: <_InactiveRpcError of RPC that terminated with: status = StatusCode.UNAVAILABLE details = "unavailable" debug_error_string = "UNKNOWN:Error received from peer ipv4:34.223.120.220:443 {created_time:"2024-05-10T11:54:43.047741403+00:00", grpc_status:14, grpc_message:"unavailable"}" Enabling gRPC's tracing[2] by setting env vars 'GRPC_VERBOSITY=debug GRPC_TRACE=all' (warning - this is _very_ verbose!) highlighted that when we do get an StatusCode.UNAVAILABLE, retry is not considered as the request is too large ("committing" in this context means it effectively disables retry attempts): 0514 14:00:43.870499051 4093173 retry_filter_legacy_call_data.cc:1855] chand=0x7ff708006080 calld=0x56377b0b11e0: exceeded retry buffer size, committing As per gRPC's options[3], the max buffer size is controlled via: /** Per-RPC retry buffer size, in bytes. Default is 256 KiB. */ #define GRPC_ARG_PER_RPC_RETRY_BUFFER_SIZE "grpc.per_rpc_retry_buffer_size" Given Upsert messages are frequently larger than 256KiB (it is common to batch up to the 2 MB limit), we will fail to retry any batches larger than 256kB. Address this by changing the retry buffer size to the same size as the maximum message we support (currently 128MB, more than sufficient to retry any UpsertRequest). [1]: https://grpc.io/docs/guides/retry/ [2]: https://github.com/grpc/grpc/blob/master/doc/environment_variables.md [3]: https://github.com/grpc/grpc/blob/befeeba0f57c6ed3608935d8317fd26289e7e080/include/grpc/impl/channel_arg_names.h#L321 --- pinecone/grpc/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pinecone/grpc/base.py b/pinecone/grpc/base.py index 00a98e6b..01413024 100644 --- a/pinecone/grpc/base.py +++ b/pinecone/grpc/base.py @@ -98,6 +98,7 @@ def _gen_channel(self, options=None): "grpc.max_receive_message_length": MAX_MSG_SIZE, "grpc.service_config": self.method_config, "grpc.enable_retries": True, + "grpc.per_rpc_retry_buffer_size": MAX_MSG_SIZE, } if self.grpc_client_config.secure: default_options["grpc.ssl_target_name_override"] = target.split(":")[0]