Skip to content

Commit 4d40d84

Browse files
steveloughranahmarsuhail
authored andcommitted
HADOOP-18945. S3A. IAMInstanceCredentialsProvider failing. (apache#6202)
This restores asynchronous retrieval/refresh of any AWS credentials provided by the EC2 instance/container in which the process is running. Contributed by Steve Loughran
1 parent a055004 commit 4d40d84

File tree

5 files changed

+220
-22
lines changed

5 files changed

+220
-22
lines changed

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@
8080
import static org.apache.hadoop.fs.s3a.impl.InstantiationIOException.isNotInstanceOf;
8181
import static org.apache.hadoop.fs.s3a.impl.InstantiationIOException.unsupportedConstructor;
8282
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.*;
83-
import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.maybeExtractNetworkException;
83+
import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.maybeExtractIOException;
8484
import static org.apache.hadoop.io.IOUtils.cleanupWithLogger;
8585
import static org.apache.hadoop.util.functional.RemoteIterators.filteringRemoteIterator;
8686

@@ -192,7 +192,7 @@ public static IOException translateException(@Nullable String operation,
192192
return ioe;
193193
}
194194
// network problems covered by an IOE inside the exception chain.
195-
ioe = maybeExtractNetworkException(path, exception);
195+
ioe = maybeExtractIOException(path, exception);
196196
if (ioe != null) {
197197
return ioe;
198198
}

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/IAMInstanceCredentialsProvider.java

Lines changed: 81 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,37 +21,69 @@
2121
import java.io.Closeable;
2222
import java.io.IOException;
2323

24+
import org.slf4j.Logger;
25+
import org.slf4j.LoggerFactory;
2426
import software.amazon.awssdk.auth.credentials.AwsCredentials;
2527
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
2628
import software.amazon.awssdk.auth.credentials.ContainerCredentialsProvider;
29+
import software.amazon.awssdk.auth.credentials.HttpCredentialsProvider;
2730
import software.amazon.awssdk.auth.credentials.InstanceProfileCredentialsProvider;
2831
import software.amazon.awssdk.core.exception.SdkClientException;
2932

3033
import org.apache.hadoop.classification.InterfaceAudience;
3134
import org.apache.hadoop.classification.InterfaceStability;
3235

36+
import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.maybeExtractIOException;
37+
3338
/**
3439
* This is an IAM credential provider which wraps
3540
* an {@code ContainerCredentialsProvider}
3641
* to provide credentials when the S3A connector is instantiated on AWS EC2
3742
* or the AWS container services.
3843
* <p>
39-
* When it fails to authenticate, it raises a
40-
* {@link NoAwsCredentialsException} which can be recognized by retry handlers
44+
* The provider is initialized with async credential refresh enabled to be less
45+
* brittle against transient network issues.
46+
* <p>
47+
* If the ContainerCredentialsProvider fails to authenticate, then an instance of
48+
* {@link InstanceProfileCredentialsProvider} is created and attemped to
49+
* be used instead, again with async credential refresh enabled.
50+
* <p>
51+
* If both credential providers fail, a {@link NoAwsCredentialsException}
52+
* is thrown, which can be recognized by retry handlers
4153
* as a non-recoverable failure.
4254
* <p>
4355
* It is implicitly public; marked evolving as we can change its semantics.
44-
*
4556
*/
4657
@InterfaceAudience.Public
4758
@InterfaceStability.Evolving
4859
public class IAMInstanceCredentialsProvider
4960
implements AwsCredentialsProvider, Closeable {
5061

51-
private final AwsCredentialsProvider containerCredentialsProvider =
52-
ContainerCredentialsProvider.builder().build();
62+
private static final Logger LOG =
63+
LoggerFactory.getLogger(IAMInstanceCredentialsProvider.class);
64+
65+
/**
66+
* The credentials provider.
67+
* Initially a container credentials provider, but if that fails
68+
* fall back to the instance profile provider.
69+
*/
70+
private HttpCredentialsProvider iamCredentialsProvider;
71+
72+
/**
73+
* Is the container credentials provider in use?
74+
*/
75+
private boolean isContainerCredentialsProvider;
5376

77+
/**
78+
* Constructor.
79+
* Build credentials provider with async refresh,
80+
* mark {@link #isContainerCredentialsProvider} as true.
81+
*/
5482
public IAMInstanceCredentialsProvider() {
83+
isContainerCredentialsProvider = true;
84+
iamCredentialsProvider = ContainerCredentialsProvider.builder()
85+
.asyncCredentialUpdateEnabled(true)
86+
.build();
5587
}
5688

5789
/**
@@ -65,9 +97,16 @@ public AwsCredentials resolveCredentials() {
6597
try {
6698
return getCredentials();
6799
} catch (SdkClientException e) {
100+
101+
// if the exception contains an IOE, extract it
102+
// so its type is the immediate cause of this new exception.
103+
Throwable t = e;
104+
final IOException ioe = maybeExtractIOException("IAM endpoint", e);
105+
if (ioe != null) {
106+
t = ioe;
107+
}
68108
throw new NoAwsCredentialsException("IAMInstanceCredentialsProvider",
69-
e.getMessage(),
70-
e);
109+
e.getMessage(), t);
71110
}
72111
}
73112

@@ -78,23 +117,52 @@ public AwsCredentials resolveCredentials() {
78117
*
79118
* @return credentials
80119
*/
81-
private AwsCredentials getCredentials() {
120+
private synchronized AwsCredentials getCredentials() {
82121
try {
83-
return containerCredentialsProvider.resolveCredentials();
122+
return iamCredentialsProvider.resolveCredentials();
84123
} catch (SdkClientException e) {
85-
return InstanceProfileCredentialsProvider.create().resolveCredentials();
124+
LOG.debug("Failed to get credentials from container provider,", e);
125+
if (isContainerCredentialsProvider) {
126+
// create instance profile provider
127+
LOG.debug("Switching to instance provider", e);
128+
129+
// close it to shut down any thread
130+
iamCredentialsProvider.close();
131+
isContainerCredentialsProvider = false;
132+
iamCredentialsProvider = InstanceProfileCredentialsProvider.builder()
133+
.asyncCredentialUpdateEnabled(true)
134+
.build();
135+
return iamCredentialsProvider.resolveCredentials();
136+
} else {
137+
// already using instance profile provider, so fail
138+
throw e;
139+
}
140+
86141
}
87142
}
88143

144+
/**
145+
* Is this a container credentials provider?
146+
* @return true if the container credentials provider is in use;
147+
* false for InstanceProfileCredentialsProvider
148+
*/
149+
public boolean isContainerCredentialsProvider() {
150+
return isContainerCredentialsProvider;
151+
}
152+
89153
@Override
90-
public void close() throws IOException {
91-
// no-op.
154+
public synchronized void close() throws IOException {
155+
// this be true but just for safety...
156+
if (iamCredentialsProvider != null) {
157+
iamCredentialsProvider.close();
158+
}
92159
}
93160

94161
@Override
95162
public String toString() {
96163
return "IAMInstanceCredentialsProvider{" +
97-
"containerCredentialsProvider=" + containerCredentialsProvider +
164+
"credentialsProvider=" + iamCredentialsProvider +
165+
", isContainerCredentialsProvider=" + isContainerCredentialsProvider +
98166
'}';
99167
}
100168
}

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ErrorTranslation.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ public static boolean isObjectNotFound(AwsServiceException e) {
7979
* @param thrown exception
8080
* @return a translated exception or null.
8181
*/
82-
public static IOException maybeExtractNetworkException(String path, Throwable thrown) {
82+
public static IOException maybeExtractIOException(String path, Throwable thrown) {
8383

8484
if (thrown == null) {
8585
return null;
@@ -100,7 +100,9 @@ public static IOException maybeExtractNetworkException(String path, Throwable th
100100
// as a new instance is created through reflection, the
101101
// class of the returned instance will be that of the innermost,
102102
// unless no suitable constructor is available.
103-
return wrapWithInnerIOE(path, thrown, (IOException) cause);
103+
final IOException ioe = (IOException) cause;
104+
105+
return wrapWithInnerIOE(path, thrown, ioe);
104106

105107
}
106108

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.fs.s3a.auth;
20+
21+
import java.io.IOException;
22+
23+
import org.assertj.core.api.Assertions;
24+
import org.junit.Test;
25+
import org.slf4j.Logger;
26+
import org.slf4j.LoggerFactory;
27+
import software.amazon.awssdk.auth.credentials.AwsCredentials;
28+
29+
import org.apache.hadoop.test.AbstractHadoopTestBase;
30+
31+
/**
32+
* Unit tests for IAMInstanceCredentials provider.
33+
* This is a bit tricky as we don't want to require running in EC2,
34+
* but nor do we want a test which doesn't work in EC2.
35+
*/
36+
public class TestIAMInstanceCredentialsProvider extends AbstractHadoopTestBase {
37+
38+
private static final Logger LOG =
39+
LoggerFactory.getLogger(TestIAMInstanceCredentialsProvider.class);
40+
41+
/**
42+
* Error string from
43+
* software.amazon.awssdk.auth.credentials.InstanceProfileCredentialsProvider,
44+
* if IAM resolution has been disabled: {@value}.
45+
*/
46+
public static final String DISABLED =
47+
"IMDS credentials have been disabled by environment variable or system property";
48+
49+
/**
50+
* Test an immediate create/close.
51+
*/
52+
@Test
53+
public void testIAMInstanceCredentialsProviderClose() throws Throwable {
54+
new IAMInstanceCredentialsProvider().close();
55+
}
56+
57+
/**
58+
* Test instantiation.
59+
* Multiple outcomes depending on host setup.
60+
* <ol>
61+
* <li> In EC2: credentials resolved.
62+
* Assert the credentials comes with a key.</li>
63+
* <li> Not in EC2: NoAwsCredentialsException wraps network error trying
64+
* to talk to the service.
65+
* Assert wrapped exception is an IOE.</li>
66+
* <li> IMDS resolution disabled by env var/sysprop.
67+
* NoAwsCredentialsException raised doesn't contain an IOE.
68+
* Require the message to contain the {@link #DISABLED} text.</li>j
69+
* </ol>
70+
*/
71+
@Test
72+
public void testIAMInstanceCredentialsInstantiate() throws Throwable {
73+
try (IAMInstanceCredentialsProvider provider = new IAMInstanceCredentialsProvider()) {
74+
try {
75+
final AwsCredentials credentials = provider.resolveCredentials();
76+
// if we get here this test suite is running in a container/EC2
77+
LOG.info("Credentials: retrieved from {}: key={}",
78+
provider.isContainerCredentialsProvider() ? "container" : "EC2",
79+
credentials.accessKeyId());
80+
Assertions.assertThat(credentials.accessKeyId())
81+
.describedAs("Access key from IMDS")
82+
.isNotBlank();
83+
84+
// and if we get here, so does a second call
85+
provider.resolveCredentials();
86+
} catch (NoAwsCredentialsException expected) {
87+
// this is expected if the test is not running in a container/EC2
88+
LOG.info("Not running in a container/EC2");
89+
LOG.info("Exception raised", expected);
90+
// and we expect to have fallen back to InstanceProfileCredentialsProvider
91+
Assertions.assertThat(provider.isContainerCredentialsProvider())
92+
.describedAs("%s: shoud be using InstanceProfileCredentialsProvider")
93+
.isFalse();
94+
final Throwable cause = expected.getCause();
95+
if (cause == null) {
96+
throw expected;
97+
}
98+
if (!(cause instanceof IOException)
99+
&& !cause.toString().contains(DISABLED)) {
100+
throw new AssertionError("Cause not a IOException", cause);
101+
}
102+
}
103+
}
104+
}
105+
106+
107+
}

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestErrorTranslation.java

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,10 @@
1919
package org.apache.hadoop.fs.s3a.impl;
2020

2121
import java.io.IOException;
22+
import java.io.UncheckedIOException;
2223
import java.net.ConnectException;
2324
import java.net.NoRouteToHostException;
25+
import java.net.SocketTimeoutException;
2426
import java.net.UnknownHostException;
2527
import java.util.Collections;
2628

@@ -31,9 +33,10 @@
3133
import software.amazon.awssdk.core.retry.RetryPolicyContext;
3234

3335
import org.apache.hadoop.fs.PathIOException;
36+
import org.apache.hadoop.fs.s3a.auth.NoAwsCredentialsException;
3437
import org.apache.hadoop.test.AbstractHadoopTestBase;
3538

36-
import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.maybeExtractNetworkException;
39+
import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.maybeExtractIOException;
3740
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
3841
import static org.junit.Assert.assertTrue;
3942

@@ -64,7 +67,7 @@ public void testUnknownHostExceptionExtraction() throws Throwable {
6467
new UnknownHostException("bottom")));
6568
final IOException ioe = intercept(UnknownHostException.class, "top",
6669
() -> {
67-
throw maybeExtractNetworkException("", thrown);
70+
throw maybeExtractIOException("", thrown);
6871
});
6972

7073
// the wrapped exception is the top level one: no stack traces have
@@ -79,7 +82,7 @@ public void testUnknownHostExceptionExtraction() throws Throwable {
7982
public void testNoRouteToHostExceptionExtraction() throws Throwable {
8083
intercept(NoRouteToHostException.class, "top",
8184
() -> {
82-
throw maybeExtractNetworkException("p2",
85+
throw maybeExtractIOException("p2",
8386
sdkException("top",
8487
sdkException("middle",
8588
new NoRouteToHostException("bottom"))));
@@ -90,17 +93,35 @@ public void testNoRouteToHostExceptionExtraction() throws Throwable {
9093
public void testConnectExceptionExtraction() throws Throwable {
9194
intercept(ConnectException.class, "top",
9295
() -> {
93-
throw maybeExtractNetworkException("p1",
96+
throw maybeExtractIOException("p1",
9497
sdkException("top",
9598
sdkException("middle",
9699
new ConnectException("bottom"))));
97100
});
98101
}
102+
103+
/**
104+
* When there is an UncheckedIOException, its inner class is
105+
* extracted.
106+
*/
107+
@Test
108+
public void testUncheckedIOExceptionExtraction() throws Throwable {
109+
intercept(SocketTimeoutException.class, "top",
110+
() -> {
111+
final SdkClientException thrown = sdkException("top",
112+
sdkException("middle",
113+
new UncheckedIOException(
114+
new SocketTimeoutException("bottom"))));
115+
throw maybeExtractIOException("p1",
116+
new NoAwsCredentialsException("IamProvider", thrown.toString(), thrown));
117+
});
118+
}
119+
99120
@Test
100121
public void testNoConstructorExtraction() throws Throwable {
101122
intercept(PathIOException.class, NoConstructorIOE.MESSAGE,
102123
() -> {
103-
throw maybeExtractNetworkException("p1",
124+
throw maybeExtractIOException("p1",
104125
sdkException("top",
105126
sdkException("middle",
106127
new NoConstructorIOE())));

0 commit comments

Comments
 (0)