Skip to content

Commit 2a69415

Browse files
authored
[JDBC] Add retries with delay (#1517)
[JDBC] Add retries with delay This change adds retries in the JDBC persistence layer, these retries are with jitter and are tunable in the following ways : a. max_retries : Total number of retries we expect the persistence to do on Connection Reset exception and serializable error exceptions, before giving up. b. max_duaration_in_ms : Time in ms since the first attempt this retries should be done. For ex on configured 500 ms the total time spent in retrying should not exceed 500ms (optimistically) c. initial_delay_in_ms : initial delay before the first attempt
1 parent 9bd2a70 commit 2a69415

File tree

16 files changed

+511
-167
lines changed

16 files changed

+511
-167
lines changed

bom/build.gradle.kts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ dependencies {
4646
api(project(":polaris-jpa-model"))
4747

4848
api(project(":polaris-quarkus-admin"))
49+
api(project(":polaris-quarkus-common"))
4950
api(project(":polaris-quarkus-test-commons"))
5051
api(project(":polaris-quarkus-defaults"))
5152
api(project(":polaris-quarkus-server"))

extension/persistence/relational-jdbc/src/main/java/org/apache/polaris/extension/persistence/relational/jdbc/DatasourceOperations.java

Lines changed: 143 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import static java.nio.charset.StandardCharsets.UTF_8;
2222

23+
import com.google.common.annotations.VisibleForTesting;
2324
import jakarta.annotation.Nonnull;
2425
import java.io.BufferedReader;
2526
import java.io.IOException;
@@ -30,20 +31,36 @@
3031
import java.sql.Statement;
3132
import java.util.ArrayList;
3233
import java.util.List;
34+
import java.util.Locale;
3335
import java.util.Objects;
36+
import java.util.Random;
37+
import java.util.concurrent.TimeUnit;
3438
import java.util.function.Consumer;
3539
import java.util.stream.Stream;
3640
import javax.sql.DataSource;
41+
import org.apache.polaris.core.persistence.EntityAlreadyExistsException;
3742
import org.apache.polaris.extension.persistence.relational.jdbc.models.Converter;
43+
import org.slf4j.Logger;
44+
import org.slf4j.LoggerFactory;
3845

3946
public class DatasourceOperations {
4047

48+
private static final Logger LOGGER = LoggerFactory.getLogger(DatasourceOperations.class);
49+
4150
private static final String CONSTRAINT_VIOLATION_SQL_CODE = "23505";
4251

52+
// POSTGRES RETRYABLE EXCEPTIONS
53+
private static final String SERIALIZATION_FAILURE_SQL_CODE = "40001";
54+
4355
private final DataSource datasource;
56+
private final RelationalJdbcConfiguration relationalJdbcConfiguration;
57+
58+
private final Random random = new Random();
4459

45-
public DatasourceOperations(DataSource datasource) {
60+
public DatasourceOperations(
61+
DataSource datasource, RelationalJdbcConfiguration relationalJdbcConfiguration) {
4662
this.datasource = datasource;
63+
this.relationalJdbcConfiguration = relationalJdbcConfiguration;
4764
}
4865

4966
/**
@@ -121,22 +138,16 @@ public <T> void executeSelectOverStream(
121138
@Nonnull Converter<T> converterInstance,
122139
@Nonnull Consumer<Stream<T>> consumer)
123140
throws SQLException {
124-
try (Connection connection = borrowConnection();
125-
Statement statement = connection.createStatement();
126-
ResultSet resultSet = statement.executeQuery(query)) {
127-
ResultSetIterator<T> iterator = new ResultSetIterator<>(resultSet, converterInstance);
128-
consumer.accept(iterator.toStream());
129-
} catch (SQLException e) {
130-
throw e;
131-
} catch (RuntimeException e) {
132-
if (e.getCause() instanceof SQLException) {
133-
throw (SQLException) e.getCause();
134-
} else {
135-
throw e;
136-
}
137-
} catch (Exception e) {
138-
throw new RuntimeException(e);
139-
}
141+
withRetries(
142+
() -> {
143+
try (Connection connection = borrowConnection();
144+
Statement statement = connection.createStatement();
145+
ResultSet resultSet = statement.executeQuery(query)) {
146+
ResultSetIterator<T> iterator = new ResultSetIterator<>(resultSet, converterInstance);
147+
consumer.accept(iterator.toStream());
148+
return null;
149+
}
150+
});
140151
}
141152

142153
/**
@@ -147,16 +158,19 @@ public <T> void executeSelectOverStream(
147158
* @throws SQLException : Exception during Query Execution.
148159
*/
149160
public int executeUpdate(String query) throws SQLException {
150-
try (Connection connection = borrowConnection();
151-
Statement statement = connection.createStatement()) {
152-
boolean autoCommit = connection.getAutoCommit();
153-
connection.setAutoCommit(true);
154-
try {
155-
return statement.executeUpdate(query);
156-
} finally {
157-
connection.setAutoCommit(autoCommit);
158-
}
159-
}
161+
return withRetries(
162+
() -> {
163+
try (Connection connection = borrowConnection();
164+
Statement statement = connection.createStatement()) {
165+
boolean autoCommit = connection.getAutoCommit();
166+
connection.setAutoCommit(true);
167+
try {
168+
return statement.executeUpdate(query);
169+
} finally {
170+
connection.setAutoCommit(autoCommit);
171+
}
172+
}
173+
});
160174
}
161175

162176
/**
@@ -166,23 +180,113 @@ public int executeUpdate(String query) throws SQLException {
166180
* @throws SQLException : Exception caught during transaction execution.
167181
*/
168182
public void runWithinTransaction(TransactionCallback callback) throws SQLException {
169-
try (Connection connection = borrowConnection()) {
170-
boolean autoCommit = connection.getAutoCommit();
171-
connection.setAutoCommit(false);
172-
boolean success = false;
183+
withRetries(
184+
() -> {
185+
try (Connection connection = borrowConnection()) {
186+
boolean autoCommit = connection.getAutoCommit();
187+
boolean success = false;
188+
connection.setAutoCommit(false);
189+
try {
190+
try {
191+
try (Statement statement = connection.createStatement()) {
192+
success = callback.execute(statement);
193+
}
194+
} finally {
195+
if (success) {
196+
connection.commit();
197+
} else {
198+
connection.rollback();
199+
}
200+
}
201+
} finally {
202+
connection.setAutoCommit(autoCommit);
203+
}
204+
}
205+
return null;
206+
});
207+
}
208+
209+
private boolean isRetryable(SQLException e) {
210+
String sqlState = e.getSQLState();
211+
212+
if (sqlState != null) {
213+
return sqlState.equals(SERIALIZATION_FAILURE_SQL_CODE); // Serialization failure
214+
}
215+
216+
// Additionally, one might check for specific error messages or other conditions
217+
return e.getMessage().toLowerCase(Locale.ROOT).contains("connection refused")
218+
|| e.getMessage().toLowerCase(Locale.ROOT).contains("connection reset");
219+
}
220+
221+
// TODO: consider refactoring to use a retry library, inorder to have fair retries
222+
// and more knobs for tuning retry pattern.
223+
@VisibleForTesting
224+
<T> T withRetries(Operation<T> operation) throws SQLException {
225+
int attempts = 0;
226+
// maximum number of retries.
227+
int maxAttempts = relationalJdbcConfiguration.maxRetries().orElse(1);
228+
// How long we should try, since the first attempt.
229+
long maxDuration = relationalJdbcConfiguration.maxDurationInMs().orElse(5000L);
230+
// How long to wait before first failure.
231+
long delay = relationalJdbcConfiguration.initialDelayInMs().orElse(100L);
232+
233+
// maximum time we will retry till.
234+
long maxRetryTime = TimeUnit.NANOSECONDS.toMillis(System.nanoTime()) + maxDuration;
235+
236+
while (attempts < maxAttempts) {
173237
try {
174-
try (Statement statement = connection.createStatement()) {
175-
success = callback.execute(statement);
176-
}
177-
} finally {
178-
if (success) {
179-
connection.commit();
238+
return operation.execute();
239+
} catch (SQLException | RuntimeException e) {
240+
SQLException sqlException;
241+
if (e instanceof RuntimeException) {
242+
// Handle Exceptions from ResultSet Iterator consumer, as it throws a RTE, ignore RTE from
243+
// the transactions.
244+
if (e.getCause() instanceof SQLException
245+
&& !(e instanceof EntityAlreadyExistsException)) {
246+
sqlException = (SQLException) e.getCause();
247+
} else {
248+
throw e;
249+
}
180250
} else {
181-
connection.rollback();
251+
sqlException = (SQLException) e;
182252
}
183-
connection.setAutoCommit(autoCommit);
253+
254+
attempts++;
255+
long timeLeft =
256+
Math.max((maxRetryTime - TimeUnit.NANOSECONDS.toMillis(System.nanoTime())), 0L);
257+
if (timeLeft == 0 || attempts >= maxAttempts || !isRetryable(sqlException)) {
258+
String exceptionMessage =
259+
String.format(
260+
"Failed due to %s, after , %s attempts and %s milliseconds",
261+
sqlException.getMessage(), attempts, maxDuration);
262+
throw new SQLException(
263+
exceptionMessage, sqlException.getSQLState(), sqlException.getErrorCode(), e);
264+
}
265+
// Add jitter
266+
long timeToSleep = Math.min(timeLeft, delay + (long) (random.nextFloat() * 0.2 * delay));
267+
LOGGER.debug(
268+
"Sleeping {} ms before retrying {} on attempt {} / {}, reason {}",
269+
timeToSleep,
270+
operation,
271+
attempts,
272+
maxAttempts,
273+
e.getMessage(),
274+
e);
275+
try {
276+
Thread.sleep(timeToSleep);
277+
} catch (InterruptedException ie) {
278+
Thread.currentThread().interrupt();
279+
throw new RuntimeException("Retry interrupted", ie);
280+
}
281+
delay *= 2; // Exponential backoff
184282
}
185283
}
284+
// This should never be reached
285+
return null;
286+
}
287+
288+
public interface Operation<T> {
289+
T execute() throws SQLException;
186290
}
187291

188292
// Interface for transaction callback

extension/persistence/relational-jdbc/src/main/java/org/apache/polaris/extension/persistence/relational/jdbc/JdbcMetaStoreManagerFactory.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ public class JdbcMetaStoreManagerFactory implements MetaStoreManagerFactory {
7474

7575
@Inject PolarisStorageIntegrationProvider storageIntegrationProvider;
7676
@Inject Instance<DataSource> dataSource;
77+
@Inject RelationalJdbcConfiguration relationalJdbcConfiguration;
7778

7879
protected JdbcMetaStoreManagerFactory() {}
7980

@@ -108,7 +109,8 @@ private void initializeForRealm(
108109
}
109110

110111
private DatasourceOperations getDatasourceOperations(boolean isBootstrap) {
111-
DatasourceOperations databaseOperations = new DatasourceOperations(dataSource.get());
112+
DatasourceOperations databaseOperations =
113+
new DatasourceOperations(dataSource.get(), relationalJdbcConfiguration);
112114
if (isBootstrap) {
113115
try {
114116
DatabaseType databaseType;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.polaris.extension.persistence.relational.jdbc;
20+
21+
import java.util.Optional;
22+
23+
public interface RelationalJdbcConfiguration {
24+
// max retries before giving up
25+
Optional<Integer> maxRetries();
26+
27+
// max retry duration
28+
Optional<Long> maxDurationInMs();
29+
30+
// initial delay
31+
Optional<Long> initialDelayInMs();
32+
}

0 commit comments

Comments
 (0)