Skip to content

Commit 04880f0

Browse files
HADOOP-13551. AWS metrics wire-up (#2778)
Moves to the builder API for AWS S3 client creation, and offers a similar style of API to the S3A FileSystem and tests, hiding the details of which options are client, which are in AWS Conf, and doing the wiring up of S3A statistics interfaces to the AWS SDK internals. S3A Statistics, including IOStatistics, should now count throttling events handled in the AWS SDK itself. This patch restores endpoint determination by probes to US-East-1 if the client isn't configured with fs.s3a.endpoint. Explicitly setting the endpoint will save the cost of these probe HTTP requests. Contributed by Steve Loughran.
1 parent 95e6892 commit 04880f0

File tree

11 files changed

+466
-220
lines changed

11 files changed

+466
-220
lines changed

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -160,14 +160,33 @@ private Constants() {
160160
DEFAULT_SSL_CHANNEL_MODE =
161161
DelegatingSSLSocketFactory.SSLChannelMode.Default_JSSE;
162162

163-
//use a custom endpoint?
163+
/**
164+
* Endpoint. For v4 signing and/or better performance,
165+
* this should be the specific endpoint of the region
166+
* in which the bucket is hosted.
167+
*/
164168
public static final String ENDPOINT = "fs.s3a.endpoint";
165169

166170
/**
167-
* Default value of s3 endpoint. If not set explicitly using
168-
* {@code AmazonS3#setEndpoint()}, this is used.
171+
* Default value of s3 endpoint: {@value}.
172+
* It tells the AWS client to work it out by asking the central
173+
* endpoint where the bucket lives; caching that
174+
* value in the client for the life of the process.
175+
* <p>
176+
* Note: previously this constant was defined as
177+
* {@link #CENTRAL_ENDPOINT}, however the actual
178+
* S3A client code used "" as the default when
179+
* {@link #ENDPOINT} was unset.
180+
* As core-default.xml also set the endpoint to "",
181+
* the empty string has long been the <i>real</i>
182+
* default value.
183+
*/
184+
public static final String DEFAULT_ENDPOINT = "";
185+
186+
/**
187+
* The central endpoint :{@value}.
169188
*/
170-
public static final String DEFAULT_ENDPOINT = "s3.amazonaws.com";
189+
public static final String CENTRAL_ENDPOINT = "s3.amazonaws.com";
171190

172191
//Enable path style access? Overrides default virtual hosting
173192
public static final String PATH_STYLE_ACCESS = "fs.s3a.path.style.access";

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java

Lines changed: 44 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,8 @@
2222
import java.net.URI;
2323

2424
import com.amazonaws.ClientConfiguration;
25-
import com.amazonaws.auth.AWSCredentialsProvider;
2625
import com.amazonaws.client.builder.AwsClientBuilder;
27-
import com.amazonaws.metrics.RequestMetricCollector;
26+
import com.amazonaws.handlers.RequestHandler2;
2827
import com.amazonaws.services.s3.AmazonS3;
2928
import com.amazonaws.services.s3.AmazonS3Client;
3029
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
@@ -41,27 +40,22 @@
4140
import org.apache.hadoop.classification.InterfaceStability;
4241
import org.apache.hadoop.conf.Configuration;
4342
import org.apache.hadoop.conf.Configured;
44-
import org.apache.hadoop.fs.s3a.statistics.StatisticsFromAwsSdk;
4543
import org.apache.hadoop.fs.s3a.statistics.impl.AwsStatisticsCollector;
4644

4745
import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING;
48-
import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT;
4946
import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING_DEFAULT;
50-
import static org.apache.hadoop.fs.s3a.Constants.PATH_STYLE_ACCESS;
5147

5248
/**
5349
* The default {@link S3ClientFactory} implementation.
5450
* This calls the AWS SDK to configure and create an
55-
* {@link AmazonS3Client} that communicates with the S3 service.
51+
* {@code AmazonS3Client} that communicates with the S3 service.
5652
*/
5753
@InterfaceAudience.Private
5854
@InterfaceStability.Unstable
5955
public class DefaultS3ClientFactory extends Configured
6056
implements S3ClientFactory {
6157

6258
private static final String S3_SERVICE_NAME = "s3";
63-
private static final String S3_SIGNER = "S3SignerType";
64-
private static final String S3_V4_SIGNER = "AWSS3V4SignerType";
6559

6660
/**
6761
* Subclasses refer to this.
@@ -70,22 +64,21 @@ public class DefaultS3ClientFactory extends Configured
7064
LoggerFactory.getLogger(DefaultS3ClientFactory.class);
7165

7266
/**
73-
* Create the client.
74-
* <p>
75-
* If the AWS stats are not null then a {@link AwsStatisticsCollector}.
76-
* is created to bind to the two.
77-
* <i>Important: until this binding works properly across regions,
78-
* this should be null.</i>
67+
* Create the client by preparing the AwsConf configuration
68+
* and then invoking {@code buildAmazonS3Client()}.
7969
*/
8070
@Override
81-
public AmazonS3 createS3Client(URI name,
82-
final String bucket,
83-
final AWSCredentialsProvider credentials,
84-
final String userAgentSuffix,
85-
final StatisticsFromAwsSdk statisticsFromAwsSdk) throws IOException {
71+
public AmazonS3 createS3Client(
72+
final URI uri,
73+
final S3ClientCreationParameters parameters) throws IOException {
8674
Configuration conf = getConf();
8775
final ClientConfiguration awsConf = S3AUtils
88-
.createAwsConf(conf, bucket, Constants.AWS_SERVICE_IDENTIFIER_S3);
76+
.createAwsConf(conf,
77+
uri.getHost(),
78+
Constants.AWS_SERVICE_IDENTIFIER_S3);
79+
// add any headers
80+
parameters.getHeaders().forEach((h, v) ->
81+
awsConf.addHeader(h, v));
8982

9083
// When EXPERIMENTAL_AWS_INTERNAL_THROTTLING is false
9184
// throttling is explicitly disabled on the S3 client so that
@@ -96,111 +89,62 @@ public AmazonS3 createS3Client(URI name,
9689
conf.getBoolean(EXPERIMENTAL_AWS_INTERNAL_THROTTLING,
9790
EXPERIMENTAL_AWS_INTERNAL_THROTTLING_DEFAULT));
9891

99-
if (!StringUtils.isEmpty(userAgentSuffix)) {
100-
awsConf.setUserAgentSuffix(userAgentSuffix);
92+
if (!StringUtils.isEmpty(parameters.getUserAgentSuffix())) {
93+
awsConf.setUserAgentSuffix(parameters.getUserAgentSuffix());
10194
}
102-
// optional metrics
103-
RequestMetricCollector metrics = statisticsFromAwsSdk != null
104-
? new AwsStatisticsCollector(statisticsFromAwsSdk)
105-
: null;
10695

107-
return newAmazonS3Client(
108-
credentials,
96+
return buildAmazonS3Client(
10997
awsConf,
110-
metrics,
111-
conf.getTrimmed(ENDPOINT, ""),
112-
conf.getBoolean(PATH_STYLE_ACCESS, false));
113-
}
114-
115-
/**
116-
* Create an {@link AmazonS3} client.
117-
* Override this to provide an extended version of the client
118-
* @param credentials credentials to use
119-
* @param awsConf AWS configuration
120-
* @param metrics metrics collector or null
121-
* @param endpoint endpoint string; may be ""
122-
* @param pathStyleAccess enable path style access?
123-
* @return new AmazonS3 client
124-
*/
125-
protected AmazonS3 newAmazonS3Client(
126-
final AWSCredentialsProvider credentials,
127-
final ClientConfiguration awsConf,
128-
final RequestMetricCollector metrics,
129-
final String endpoint,
130-
final boolean pathStyleAccess) {
131-
if (metrics != null) {
132-
LOG.debug("Building S3 client using the SDK builder API");
133-
return buildAmazonS3Client(credentials, awsConf, metrics, endpoint,
134-
pathStyleAccess);
135-
} else {
136-
LOG.debug("Building S3 client using the SDK builder API");
137-
return classicAmazonS3Client(credentials, awsConf, endpoint,
138-
pathStyleAccess);
139-
}
98+
parameters);
14099
}
141100

142101
/**
143-
* Use the (newer) Builder SDK to create a an AWS S3 client.
102+
* Use the Builder API to create an AWS S3 client.
144103
* <p>
145-
* This has a more complex endpoint configuration in a
146-
* way which does not yet work in this code in a way
147-
* which doesn't trigger regressions. So it is only used
148-
* when SDK metrics are supplied.
149-
* @param credentials credentials to use
104+
* This has a more complex endpoint configuration mechanism
105+
* which initially caused problems; the
106+
* {@code withForceGlobalBucketAccessEnabled(true)}
107+
* command is critical here.
150108
* @param awsConf AWS configuration
151-
* @param metrics metrics collector or null
152-
* @param endpoint endpoint string; may be ""
153-
* @param pathStyleAccess enable path style access?
109+
* @param parameters parameters
154110
* @return new AmazonS3 client
155111
*/
156-
private AmazonS3 buildAmazonS3Client(
157-
final AWSCredentialsProvider credentials,
112+
protected AmazonS3 buildAmazonS3Client(
158113
final ClientConfiguration awsConf,
159-
final RequestMetricCollector metrics,
160-
final String endpoint,
161-
final boolean pathStyleAccess) {
114+
final S3ClientCreationParameters parameters) {
162115
AmazonS3ClientBuilder b = AmazonS3Client.builder();
163-
b.withCredentials(credentials);
116+
b.withCredentials(parameters.getCredentialSet());
164117
b.withClientConfiguration(awsConf);
165-
b.withPathStyleAccessEnabled(pathStyleAccess);
166-
if (metrics != null) {
167-
b.withMetricsCollector(metrics);
118+
b.withPathStyleAccessEnabled(parameters.isPathStyleAccess());
119+
120+
if (parameters.getMetrics() != null) {
121+
b.withMetricsCollector(
122+
new AwsStatisticsCollector(parameters.getMetrics()));
123+
}
124+
if (parameters.getRequestHandlers() != null) {
125+
b.withRequestHandlers(
126+
parameters.getRequestHandlers().toArray(new RequestHandler2[0]));
127+
}
128+
if (parameters.getMonitoringListener() != null) {
129+
b.withMonitoringListener(parameters.getMonitoringListener());
168130
}
169131

170132
// endpoint set up is a PITA
171-
// client.setEndpoint("") is no longer available
172133
AwsClientBuilder.EndpointConfiguration epr
173-
= createEndpointConfiguration(endpoint, awsConf);
134+
= createEndpointConfiguration(parameters.getEndpoint(),
135+
awsConf);
174136
if (epr != null) {
175137
// an endpoint binding was constructed: use it.
176138
b.withEndpointConfiguration(epr);
139+
} else {
140+
// no idea what the endpoint is, so tell the SDK
141+
// to work it out at the cost of an extra HEAD request
142+
b.withForceGlobalBucketAccessEnabled(true);
177143
}
178144
final AmazonS3 client = b.build();
179145
return client;
180146
}
181147

182-
/**
183-
* Wrapper around constructor for {@link AmazonS3} client.
184-
* Override this to provide an extended version of the client.
185-
* <p>
186-
* This uses a deprecated constructor -it is currently
187-
* the only one which works for us.
188-
* @param credentials credentials to use
189-
* @param awsConf AWS configuration
190-
* @param endpoint endpoint string; may be ""
191-
* @param pathStyleAccess enable path style access?
192-
* @return new AmazonS3 client
193-
*/
194-
@SuppressWarnings("deprecation")
195-
private AmazonS3 classicAmazonS3Client(
196-
AWSCredentialsProvider credentials,
197-
ClientConfiguration awsConf,
198-
final String endpoint,
199-
final boolean pathStyleAccess) {
200-
final AmazonS3 client = new AmazonS3Client(credentials, awsConf);
201-
return configureAmazonS3Client(client, endpoint, pathStyleAccess);
202-
}
203-
204148
/**
205149
* Configure classic S3 client.
206150
* <p>
@@ -226,31 +170,6 @@ protected static AmazonS3 configureAmazonS3Client(AmazonS3 s3,
226170
throw new IllegalArgumentException(msg, e);
227171
}
228172
}
229-
return applyS3ClientOptions(s3, pathStyleAccess);
230-
}
231-
232-
/**
233-
* Perform any tuning of the {@code S3ClientOptions} settings based on
234-
* the Hadoop configuration.
235-
* This is different from the general AWS configuration creation as
236-
* it is unique to S3 connections.
237-
* <p>
238-
* The {@link Constants#PATH_STYLE_ACCESS} option enables path-style access
239-
* to S3 buckets if configured. By default, the
240-
* behavior is to use virtual hosted-style access with URIs of the form
241-
* {@code http://bucketname.s3.amazonaws.com}
242-
* <p>
243-
* Enabling path-style access and a
244-
* region-specific endpoint switches the behavior to use URIs of the form
245-
* {@code http://s3-eu-west-1.amazonaws.com/bucketname}.
246-
* It is common to use this when connecting to private S3 servers, as it
247-
* avoids the need to play with DNS entries.
248-
* @param s3 S3 client
249-
* @param pathStyleAccess enable path style access?
250-
* @return the S3 client
251-
*/
252-
protected static AmazonS3 applyS3ClientOptions(AmazonS3 s3,
253-
final boolean pathStyleAccess) {
254173
if (pathStyleAccess) {
255174
LOG.debug("Enabling path style access!");
256175
s3.setS3ClientOptions(S3ClientOptions.builder()

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919
package org.apache.hadoop.fs.s3a;
2020

2121
import com.amazonaws.ClientConfiguration;
22-
import com.amazonaws.auth.AWSCredentialsProvider;
23-
import com.amazonaws.metrics.RequestMetricCollector;
2422
import com.amazonaws.services.s3.AmazonS3;
2523

2624
import org.apache.hadoop.classification.InterfaceAudience;
@@ -31,31 +29,25 @@
3129
* This client is for testing <i>only</i>; it is in the production
3230
* {@code hadoop-aws} module to enable integration tests to use this
3331
* just by editing the Hadoop configuration used to bring up the client.
32+
*
33+
* The factory uses the older constructor-based instantiation/configuration
34+
* of the client, so does not wire up metrics, handlers etc.
3435
*/
3536
@InterfaceAudience.Private
3637
@InterfaceStability.Unstable
3738
public class InconsistentS3ClientFactory extends DefaultS3ClientFactory {
3839

39-
/**
40-
* Create the inconsistent client.
41-
* Logs a warning that this is being done.
42-
* @param credentials credentials to use
43-
* @param awsConf AWS configuration
44-
* @param metrics metric collector
45-
* @param endpoint AWS endpoint
46-
* @param pathStyleAccess should path style access be supported?
47-
* @return an inconsistent client.
48-
*/
4940
@Override
50-
protected AmazonS3 newAmazonS3Client(AWSCredentialsProvider credentials,
51-
ClientConfiguration awsConf,
52-
final RequestMetricCollector metrics,
53-
final String endpoint,
54-
final boolean pathStyleAccess) {
41+
protected AmazonS3 buildAmazonS3Client(
42+
final ClientConfiguration awsConf,
43+
final S3ClientCreationParameters parameters) {
5544
LOG.warn("** FAILURE INJECTION ENABLED. Do not run in production! **");
5645
InconsistentAmazonS3Client s3
57-
= new InconsistentAmazonS3Client(credentials, awsConf, getConf());
58-
configureAmazonS3Client(s3, endpoint, pathStyleAccess);
46+
= new InconsistentAmazonS3Client(
47+
parameters.getCredentialSet(), awsConf, getConf());
48+
configureAmazonS3Client(s3,
49+
parameters.getEndpoint(),
50+
parameters.isPathStyleAccess());
5951
return s3;
6052
}
6153
}

0 commit comments

Comments
 (0)